How to have sum of values sum to 1 in geom_freqpoly()? - r

As an example, we can use geom_freqpoly() to examine how hp varies by cyl in the mtcars data.
library(tidyverse)
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x=hp, color=cyl) +
geom_freqpoly(mapping = aes(y = after_stat(ncount)), bins=5)
Using after_stat(ncount), I can make each line be normalized between 0 and 1. However, is there a way to have it so that the sum of all the lines at any point is equal to 1? i.e., at any value of hp, the red, green, and blue lines add to one -- representing the estimated proportion of each cyl type at that value of hp.

This can be achieved with position = "fill", though it looks confusing with lines and is better represented as a filled geom using the same statistical transformation as geom_freqpoly
library(tidyverse)
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x = hp, fill =c yl) +
stat_bin(bins = 5, position = "fill", geom = "area")
Compare this to the same result using an unfilled geom_freqpoly
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x = hp, color = cyl) +
geom_freqpoly(position = "fill", bins = 5)
I think this is harder to follow.
Another alternative to geom_freqpoly would be geom_density, which permits more visually appealing representations of similar information:
mtcars %>%
mutate(cyl = as.factor(cyl)) %>%
ggplot() +
aes(x = hp, fill = cyl) +
geom_density(position = "fill", alpha = 0.5, color = "white", lwd = 2) +
coord_cartesian(xlim = c(50, 200)) +
scale_fill_brewer(palette = "Set2") +
theme_minimal(base_size = 20) +
labs(y = "Relative density")
Created on 2022-09-05 with reprex v2.0.2

Related

Order grouped scatterplot by mean

I am plotting a geom_point for several groups (Loc) and want in addition a line that indicates the mean of the points for each group. The groups should be ordered based on the mean of the Size for each group. I am trying to do this by reorder(Loc, Size.Mean) but it does not reorder.
ggplot(data,aes(Loc,Size,color=Loc)) +
geom_point() +
geom_point(data %>%
group_by(Loc) %>%
summarise(Size.Mean = mean(Size)),
mapping = aes(y = Size.Mean, x = reorder(Loc, Size.Mean)),
color = "black", shape = '-') +
theme_pubr(base_size=8) +
scale_y_continuous(trans="log10") +
theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
theme(legend.position = "none")
ggplot orders discrete x ticks according to their level if the variable is a factor:
library(tidyverse)
iris_means <-
iris %>%
group_by(Species) %>%
summarise(mean = mean(Sepal.Length)) %>%
arrange(-mean)
iris %>%
mutate(Species = Species %>% factor(levels = iris_means$Species)) %>%
ggplot(aes(Species, Sepal.Length)) +
geom_point() +
geom_crossbar(data = iris_means, mapping = aes(y = mean, ymin = mean, ymax = mean), color = "red")
Created on 2021-09-10 by the reprex package (v2.0.1)

GGplot Color Outline

data(mtcars)
library(ggplot2)
ggplot(mtcars, aes(x = reorder(row.names(mtcars), mpg), y = mpg, fill = factor(cyl))) +
geom_bar(stat = "identity")
This will ggplot the bars with solid fills but what if I wish to use the same fill colors as outlines for some measures but solid fills for others. For example if 'am' equals to 1 it is solid fill but if 'am' equals to 0 than it is just an outline fill like this sample:
One option to remove the fill based on a logical condition is to change those values to NA.
library(tidyverse)
d <- head(mtcars) %>%
rownames_to_column() %>%
# make a new variable for fill
# note: don't use ifelse on a factor!
mutate(cyl_fill = ifelse(am == 0, NA, cyl),
# now make them factors
# (you can do this inside ggplot, but that is messy)
cyl = factor(cyl),
cyl_fill = factor(cyl_fill, levels = levels(cyl)))
# plot
p <- ggplot(d) +
aes(x = rowname,
y = mpg,
color = cyl,
fill = cyl_fill
) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90))
# change the fill color of NA values
p + scale_fill_discrete(drop=FALSE, na.value="white")
If you want NA fill values to be empty and omitted from the legend:
# omit the fill color of NA values
# note: drop=FALSE is still needed to keep the fill and (outline) color values the same
p + scale_fill_discrete(drop=FALSE, na.translate = F)
You can change the color of the outline in the same way (e.g. cyl_color = ifelse(am != 0, NA, Cyl)), but if you want to specify a color like white or black, it will (should) appear in the legend. You can try to hack your way around these wise defaults by plotting non-aesthetic layers behind your main layers, but it usually gets ugly:
head(mtcars) %>%
rownames_to_column() %>%
mutate(cyl_fill = ifelse(am == 0, NA, cyl),
cyl_color = ifelse(am != 0, NA, cyl),
cyl = factor(cyl),
cyl_fill = factor(cyl_fill, levels = levels(cyl)),
cyl_color = factor(cyl_color, levels = levels(cyl))) %>%
ggplot() +
aes(x = rowname,
y = mpg,
color = cyl_color,
fill = cyl_fill
) +
geom_bar(stat = "identity", color = "black") + # NON-AES LAYER FIRST
geom_bar(stat = "identity") + # Covers up the black except where omitted
theme(axis.text.x = element_text(angle = 90))+
scale_fill_discrete(drop=FALSE, na.translate = F) +
scale_color_discrete(drop=FALSE, na.translate = F)
You could assign the desired colors to each level of the fill and color variables. For example:
library(tidyverse)
mtcars %>%
rownames_to_column() %>%
arrange(mpg) %>%
mutate(rowname=factor(rowname, levels=rowname)) %>%
ggplot(aes(x = rowname, y = mpg, fill = factor(am), colour=factor(cyl))) +
geom_col(size=1) +
scale_fill_manual(values=c("0"="white", "1"="red")) +
scale_color_manual(values=c("4"="blue", "6"="orange", "8"="white")) +
theme_classic() +
theme(axis.text.x=element_text(angle=-90, vjust=0.5, hjust=0))
May be, we can do
library(dplyr)
library(ggplot2)
mtcars %>%
mutate(new = case_when(am == 1 ~ factor(cyl)),
new1 = case_when(am !=1 ~ factor(cyl))) %>%
ggplot(aes(x = reorder(row.names(mtcars), mpg), y = mpg,
fill = new, color = new1)) +
geom_bar(stat = 'identity') +
scale_fill_discrete(na.value= NA) + # similar to Devin Judge-Lord post
theme_classic() +
theme(axis.text.x=element_text(angle=-90, vjust=0.5, hjust=0))

naming facets in facet_wrap

I am having issues trying to name a set of plots created with the facet_wrap feature. I am specifically trying to wrap the titles onto multiple lines. I have looked into this issue extensively within stack overflow and cannot find the error that I am generating. The code is below. a2$variable is a column of character strings (for grouping purposes), a2$ma_3 and a2$ma_12 are moving averages that I am trying to plot. The error that is generated is:
Error in as.integer(n) :
cannot coerce type 'closure' to vector of type 'integer'
p1=a2 %>%
ggplot(aes(x = date, color = variable)) +
geom_line(aes(y = ma_12), color = "aquamarine3", alpha = 0.5,size=.7) +
geom_line(aes(y = ma_3), color = "gray40", alpha = 0.5,size=.7) +
facet_wrap(~ variable, ncol = 3, scale = "free_y",label_wrap_gen(width=10))
Thanks in advance.
You're close. To modify the facet_wrap labels, we use the labeller argument:
library(tibble)
library(ggplot2)
mtcars %>%
rownames_to_column() %>%
head() %>%
ggplot(aes(x = mpg, color = cly)) +
geom_point(aes(y = wt), color = "aquamarine3", alpha = 0.5,size=5) +
geom_point(aes(y = qsec), color = "gray40", alpha = 0.5,size=5) +
facet_wrap(~ rowname, ncol = 3, scale = "free_y",
labeller = label_wrap_gen(width = 10))
Output:
I'd suggest formatting the variable before you send it to ggplot, like this:
library(tidyverse)
mtcars %>%
rownames_to_column() %>%
head() %>%
mutate(carname = stringr::str_wrap(rowname, 10)) %>%
ggplot(aes(x = mpg, color = cly)) +
geom_point(aes(y = wt), color = "aquamarine3", alpha = 0.5,size=5) +
geom_point(aes(y = qsec), color = "gray40", alpha = 0.5,size=5) +
facet_wrap(~ carname, ncol = 3, scale = "free_y")

ggplot - plotting bars and lines in the same chart

I need to generate a plot with bar graph for two variables and a line with the third variable.
I can create a column graph for one variable like below
df <- head(mtcars)
df$car <- row.names(df)
ggplot(df) + geom_col(aes(x=car, y=disp))
Ref this answer - I can plot two variables - disp and hp as below
library(tidyr)
df$car = row.names(df)
df_long = gather(df, key = var, value = value, disp, hp)
ggplot(df_long, aes(x = car, y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'dodge')
I need to have a third variable qsec plotted as a line like as in the below chart - how to go about this ?
You can try:
library(tidyverse)
# some data
data <- mtcars %>%
mutate(car = rownames(mtcars)) %>%
slice(1:6) %>%
select(car, disp, hp)
data %>%
gather(key, value, -car) %>%
group_by(car) %>%
mutate(qsec_value = median(value)) %>%
mutate(qsec = factor("qsec")) %>%
ggplot() +
geom_col(aes(x=car, y=value, fill = key), position = "dodge") +
geom_point(aes(x=car, y=qsec_value,color = qsec)) +
geom_line(aes(x=car, y=qsec_value, color = qsec, group =1)) +
scale_colour_manual(name= "", values = 1) +
theme(legend.position = "top",
legend.title = element_blank())
Less code, same result:
data %>%
pivot_longer(-1) %>%
ggplot(aes(x = car)) +
geom_col(aes(y=value, fill = name), position = "dodge") +
stat_summary(aes(y=value, group=1, color="qseq"), fun = "median", geom = "point")+
stat_summary(aes(y=value, group=1, color="qseq"), fun = "median", geom = "line")+
scale_colour_manual(name= "", values = 1)
You need another layer and because geom_line is for continuous data, you need to do as if your x-values are for the line-layer. By doing so, order of data becomes crucial, hence you have also to sort it:
gather(df, key = var, value = value, disp, hp, qsec) %>%
arrange(car) %>%
{
print(
ggplot() +
geom_bar(stat = 'identity', position = 'dodge', data = filter(., var != "qsec"), mapping = aes(x = car, y = value, fill = var)) +
geom_line(mapping = aes(x = 1:length(car), y = value), data = filter(., var == "qsec"))
)
}
Edit:
btw, you can check the correct order of qsec to the respective x-value by calling plotly::ggplotly(), then you can read the values better and compare them to the df, because they will show up if you point on the element...

ggplot2 add sum to chart

Using mtcars as an example, I've produced some violin plots. I wanted to add two things to this chart:
for each group, list n
for each group, sum a third variable (e.g. wt)
I can do (1) with the geom_text code below although (n) is actually plotted on the x axis rather than off to the side.
But I can't work out how to do (2).
Any help much appreciated!
library(ggplot2)
library(gridExtra)
library(ggthemes)
result <- mtcars
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(stat = "count", aes(label = ..count.., y = ..count..))
You can add both of these annotations by creating them in your dataframe temporarily prior to graphing. Using the dplyr package, you can create two new columns, one with the count for each group, and one with the sum of wt for each group. This can then be piped directly into your ggplot using %>% (alternatively, you could save the new dataset and insert it into ggplot the way you have it). Then with some minor edits to your geom_text call and adding a second one, we can create the plot you want. The code looks like this:
library(ggplot2)
library(gridExtra)
library(ggthemes)
library(magrittr)
library(dplyr)
result <- mtcars
result %>%
group_by(gear) %>%
mutate(count = n(), sum_wt = sum(wt)) %>%
ggplot(aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(aes(label = paste0("n = ", count),
x = (gear + 0.25),
y = 4.75)) +
geom_text(aes(label = paste0("sum wt = ", sum_wt),
x = (gear - 0.25),
y = 4.75))
The new graph looks like this:
Alternatively, if you create a summary data frame named result_sum, then you can manually add that into the geom_text calls.
result <- mtcars %>%
mutate(gear = factor(as.character(gear)))
result_sum <- result %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(result, aes(x = gear, y = drat, , group=gear)) +
theme_tufte(base_size = 15) +
theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count),
x = (as.numeric(gear) + 0.25),
y = 4.75)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", sum_wt),
x = (as.numeric(gear) - 0.25),
y = 4.75))
This gives you this:
The benefit to this second method is that the text isn't bold like in the first graph. The bold effect occurs in the first graph due to the text being printed over itself for all observations in the dataframe.
Thanks to those who helped.... I used this in the end which plots the calculated values, one set of classes being text based so using vjust to position the vertical offset.
thanks again!
library(ggplot2)
library(gridExtra)
library(ggthemes)
results <- mtcars
results$gear <- as.factor(as.character(results$gear)) #Turn 'gear' to text to simulate classes, then factorise
result_sum <- results %>%
group_by(gear) %>%
summarise(count = n(), sum_wt = sum(wt))
ggplot(results, aes(x = gear, y = drat, group=gear)) +
theme_tufte(base_size = 15) + theme(line=element_blank()) +
geom_violin(fill = "white") +
geom_boxplot(fill = "black", alpha = 0.3, width = 0.1) +
ylab("drat") +
xlab("gear") +
coord_flip()+
geom_text(data = result_sum, aes(label = paste0("n = ", count), x = (gear), vjust= 0, y = 5.25)) +
geom_text(data = result_sum, aes(label = paste0("sum wt = ", round(sum_wt,0)), x = (gear), vjust= -2, y = 5.25))

Resources