Related
I have a core i9 in my work computer and I was wondering is it possible to enable use of multiple CPU cores to produce ggplots more quickly? Or is ggplot more GPU dependent?
Data (df):
The can be accessed here. I wasn't able to paste it since the data is big. And I guess this dataset is necessary for the question since I want to speed up ggplot.
Sample code:
df = read.csv("path/TMean.csv")
gg = df %>%
ggplot(aes(x = year, y = tmean)) +
geom_point(aes(color = "Temperature"), size = 2, shape = 1, alpha = 0.1) +
geom_smooth(method = lm, aes(linetype = "LM"), se = FALSE, color = "red") +
scale_linetype_manual(values = 2, name = NULL) +
scale_colour_manual(values = "deepskyblue4", name = "Legend") +
theme(text = element_text(size = 16)) +
xlab("Year") +
ylab("Mean Temperature (\u00B0C)") +
ggtitle("1980-2021 Historical Change")+
guides(color = guide_legend(override.aes = list(alpha = 0.5), order = 1))
# Model and label formula and R^2
lm(tmean ~ year, data = df) -> model_df
get_formula(model_df)
scales::percent(summary(df)$r.squared, accuracy = 0.01) -> r_square
summary(r_square)$r.squared -> r_squared_df
r_squared_df = round(r_squared_df, digits = 4)
#Now we need to add the text to the plot:
gg +
geom_text(x = 1983.2, y = 30.8,
label = paste0("Formula = ", get_formula(model_df)),
color = 'red') +
geom_text(x = 1980, y = 30.4,
label = paste0("R\U00B2 = ", r_squared_df),
color = 'blue')
I've been searching the answer for two days and still can't find how to do this. The closest cases I found here and here. But the former has no points on the plots and the latter has no answer. Without much ado, how to add points to my legend?
This is my data:
Year <- c(2003:2020)
TheData <- c(72.6, 72.7, 72.6, 72.5, 72.4, 72.1, 71.8, 71.7, 71.8, 72.3, 72.7,
72.9, 73.1, 73.3, 73.8, 74.7, 75.7, 77.1)
Lower <- c(72.33316, 72.05961, 71.8218, 71.62303, 71.46657, 71.35567, 71.29362,
71.28368, 71.32915, 71.43331, 71.59947, 71.83096, 72.13113, 72.50333,
72.95092, 73.47728, 74.08581, 74.77989)
Upper <- c(73.46626, 73.24078, 73.05676, 72.91817, 72.82899, 72.79323, 72.81489,
72.89794, 73.04639, 73.26418, 73.55528, 73.92363, 74.37315, 74.90775,
75.53132, 76.24776, 77.06094, 77.97473)
Model <- c(72.89971, 72.65020, 72.43928, 72.27060, 72.14778, 72.07445, 72.05425,
72.09081, 72.18777, 72.34874, 72.57738, 72.87730, 73.25214, 73.70554,
74.24112, 74.86252, 75.57337, 76.37731)
MyDF <- data.frame(Year, TheData, Lower, Upper, Model)
This is my code:
library("ggplot2")
ggplot(MyDF, aes(x = Year, y = TheData)) +
geom_point(aes(y = TheData), size = 2.5) +
geom_line(aes(x = Year, y = Model, color = "Model", fill = "Model")) +
geom_ribbon(aes(ymin = Lower, ymax = Upper, x = Year,
fill = "Confidence Interval"), alpha = 0.15) +
scale_colour_manual(
name = "", values = c("Confidence Interval" = "transparent",
"Model" = "black")) +
scale_fill_manual(
name = "", values = c("Confidence Interval" = "grey12",
"Model" = "transparent")) +
theme(legend.position = "bottom")
This is my plot.
If you want to get a legend you have to map on an aesthetic, e.g. you could map on the shape aes to get a legend for your points too:
library("ggplot2")
ggplot(MyDF, aes(x = Year, y = TheData)) +
geom_point(aes(y = TheData, shape = "TheData"), size = 2.5) +
geom_line(aes(x = Year, y = Model, color = "Model")) +
geom_ribbon(aes(ymin = Lower, ymax = Upper, x = Year,
fill = "Confidence Interval"), alpha = 0.15) +
scale_colour_manual(
name = "", values = c("Confidence Interval" = "transparent",
"Model" = "black")) +
scale_fill_manual(
name = "", values = c("Confidence Interval" = "grey12",
"Model" = "transparent")) +
theme(legend.position = "bottom") +
labs(shape = "")
If somebody is interested to move the legend to free space on the plot there is an obvious way to do so:
ggplot(MyDF, aes(x = Year, y = TheData)) +
geom_point(aes(y = TheData, shape = "TheData"), size = 2.5) +
geom_line(aes(x = Year, y = Model, color = "Model")) +
geom_ribbon(aes(ymin = Lower, ymax = Upper, x = Year,
fill = "Confidence Interval"), alpha = 0.15) +
scale_colour_manual(
name = "", values = c("Confidence Interval" = "transparent",
"Model" = "black")) +
scale_fill_manual(
name = "", values = c("Confidence Interval" = "grey12",
"Model" = "transparent")) +
theme(legend.position = "bottom") +
labs(shape = "") +
theme(legend.position = c(.4, .7))
But the legend appears stacked:
Adding + guides(color = guide_legend(nrow = 1)) does not work:
My colleague have proposed to add legend.box = "horizontal". This code works:
ggplot(MyDF, aes(x = Year, y = TheData)) +
geom_point(aes(y = TheData, shape = "TheData"), size = 2.5) +
geom_line(aes(x = Year, y = Model, color = "Model")) +
geom_ribbon(aes(ymin = Lower, ymax = Upper, x = Year,
fill = "Confidence Interval"), alpha = 0.15) +
scale_colour_manual(
name = "", values = c("Confidence Interval" = "transparent",
"Model" = "black")) +
scale_fill_manual(
name = "", values = c("Confidence Interval" = "grey12",
"Model" = "transparent")) +
theme(legend.position = "bottom") +
labs(shape = "") +
theme(legend.position = c(.4, .7), legend.box = "horizontal") +
guides(color = guide_legend(nrow = 1))
The plot looks like this:
Still, I wonder why the legend appears in different boxes and how to put it together?
I am trying to overlay a bar chart with a line graph on a single plot with ggplot in R. My line graph works fine but the data are much larger than the data for the bar chart component.
How could I use an additional scale for this bar chart or do something that will get this to look nice all in one graph.
Here is my plot code thus far:
chart <- data.frame("QuantileName" = 1:5, "AvgLoss" = c(100, 500, 1000, 2500, 3000), "AvgFactor" = c(1.0, 1.1, 1.3, 1.4, 1.5))
Plot <- ggplot(chart, aes(x = 1:5)) +
scale_x_continuous(name = "Quintile", limits = c(0, 5 + .5), breaks = seq(1, 5)) +
geom_line(aes(y = AvgLoss, colour = "AvgLoss")) +
geom_bar(aes(y = AvgFactor, colour = "AvgFactor" ), stat = "identity") +
geom_text(aes(y = AvgLoss, label = round(AvgLoss)), position = position_nudge(x = .3)) +
geom_point(aes(y = AvgLoss)) +
ylab("AvgLoss") +
scale_colour_manual("",breaks = c("AvgLoss","AvgFactor"), values = c("AvgLoss" = "red", "AvgFactor" = "grey")) +
ggtitle("Quintile Plot") +
theme(plot.title = element_text(hjust=0.5))
Plot
Thank you for any help!
Essentialy, multiply your AvgFactor variable by a number
+ geom_bar(aes(y = AvgFactor*1000, colour = "AvgFactor" ), stat = "identity")
and set
+ scale_y_continuous(sec.axis = sec_axis(~ ./1000, name = "AvgFactor"))
so your plot code would look like
Plot <- ggplot(chart, aes(x = 1:5)) +
scale_x_continuous(name = "Quintile", limits = c(0, 5 + .5),
breaks = seq(1, 5)) +
geom_bar(aes(y = AvgFactor*1000, colour = "AvgFactor" ),
stat = "identity") +
geom_line(aes(y = AvgLoss, colour = "AvgLoss")) +
geom_text(aes(y = AvgLoss,
label = round(AvgLoss)),
position = position_nudge(x = .3)) +
geom_point(aes(y = AvgLoss)) +
ylab("AvgLoss") +
scale_colour_manual("",breaks = c("AvgLoss","AvgFactor"),
values = c("AvgLoss" = "red", "AvgFactor" = "grey")) +
ggtitle("Quintile Plot") +
theme(plot.title = element_text(hjust=0.5)) +
scale_y_continuous(sec.axis = sec_axis(~ ./1000, name = "AvgFactor"))
However, I think it is probably more elegant to avoid secondary axes whenever possible.
It may be useful to know that geom_col(...) is shorthand for geom_bar(..., stat = 'identity')
I would like to change the colour of one of my ggrepel labels to black. I have tried to override the inheritance by specifying ...geom_text_repel(...colour='black') but that doesn't seem to work.
My attempt at a fix to the problem is in the second geom_text_repel function (below).
N.B. If there is a way to control the colour of individual geom_text_repel elements, rather than having to call the function twice, I would prefer that.
library("tidyverse")
library("ggthemes")
library("ggrepel")
df1 <- gather(economics, variable_name, observation, -date) %>%
rename(period = date) %>%
filter(variable_name == 'psavert')
df2 <- gather(economics, variable_name, observation, -date) %>%
rename(period = date) %>%
filter(variable_name == 'uempmed')
ggplot(df1, aes(x = period, y = observation, colour = variable_name)) +
geom_line() +
geom_line(data = df2, colour = 'black', size = .8) +
geom_text_repel(
data = subset(df1, period == max(as.Date(period, "%Y-%m-%d"))),
aes(label = variable_name),
size = 3,
nudge_x = 45,
segment.color = 'grey80'
) +
geom_text_repel(
data = subset(df2, period == max(as.Date(period, "%Y-%m-%d"))),
aes(label = variable_name, colour = 'black'), #How do I set the colour of the label text to black?
size = 3,
nudge_x = 45,
segment.color = 'grey80'
) +
scale_y_continuous(labels = scales::comma) +
theme_minimal(base_size = 16) +
scale_color_tableau() +
scale_fill_tableau() +
theme(legend.position = 'none') +
labs(x="", y="", title = "Economic Data") +
scale_x_date(limits = c(min(df1$period), max(df1$period) + 1200))
Do the same thing you did in your geom_line() layer. You want to set a color, not a mapping. Make colour = 'black' an argument to geom_text_repel(), not aes().
ggplot(df1, aes(x = period, y = observation, colour = variable_name)) +
geom_line() +
geom_line(data = df2, colour = 'black', size = .8) + # just like this layer
geom_text_repel(
data = subset(df1, period == max(as.Date(period, "%Y-%m-%d"))),
aes(label = variable_name),
size = 3,
nudge_x = 45,
segment.color = 'grey80'
) +
geom_text_repel(
data = subset(df2, period == max(as.Date(period, "%Y-%m-%d"))),
aes(label = variable_name) # don't assign it here,
size = 3,
nudge_x = 45,
segment.color = 'grey80',
colour = "black" # assign it here
) +
scale_y_continuous(labels = scales::comma) +
theme_minimal(base_size = 16) +
scale_color_tableau() +
scale_fill_tableau() +
theme(legend.position = 'none') +
labs(x="", y="", title = "Economic Data") +
scale_x_date(limits = c(min(df1$period), max(df1$period) + 1200))
Note that now the first line AND text are now both set manually to "black", so the automatic variable assignment will start over with next line (and text). If you want to set that manually to a different color, you can use the same strategy (set it as an argument to the geom, not as an argument to aes
I wanted to comment on the following doubt.
Using this code:
Plot<-data.frame(Age=c(0,0,0,0,0),Density=c(0,0,0,0,0),Sensitivity=c(0,0,0,0,0),inf=c(0,0,0,0,0),sup=c(0,0,0,0,0),tde=c(0,0,0,0,0))
Plot[1,]<-c(1,1,0.857,0.793,0.904,0.00209834)
Plot[2,]<-c(1,2,0.771 ,0.74,0.799,0.00348286)
Plot[3,]<-c(1,3,0.763 ,0.717,0.804,0.00577784)
Plot[4,]<-c(1,4,0.724 ,0.653,0.785,0.00504161)
Plot[5,]<-c(2,1,0.906,0.866,0.934,0.00365742)
Plot[6,]<-c(2,2,0.785 ,0.754,0.813,0.00440399)
Plot[7,]<-c(2,3,0.660,0.593,0.722,0.00542849)
Plot[8,]<-c(2,4,0.544,0.425,0.658,0.00433052)
names(Plot)<-c("Age","Mammographyc density","Sensitivity","inf","sup","tde")
Plot$Age<-c("50-59","50-59","50-59","50-59","60-69","60-69","60-69","60-69")
Plot$Density<-c("Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense","Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense")
levels(Plot$Age)<-c("50-59","60-69")
levels(Plot$Density)<-c("Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense")
pd <- position_dodge(0.2) #
Plot$Density <- reorder(Plot$Density, 1-Plot$Sensitivity)
ggplot(Plot, aes(x = Density, y = 100*Sensitivity, colour=Age)) +
geom_errorbar(aes(ymin = 100*inf, ymax = 100*sup), width = .1, position = pd) +
geom_line(position = pd, aes(group = Age), linetype = c("dashed")) +
geom_point(position = pd, size = 4)+
scale_y_continuous(expand = c(0, 0),name = 'Sensitivity (%)',sec.axis = sec_axis(~./5, name = 'Breast cancer detection rate (per 1000 mammograms)', breaks = c(0,5,10,15,20),
labels = c('0‰',"5‰", '10‰', '15‰', '20‰')), limits = c(0,100)) +
geom_line(position = pd, aes(x = Density, y = tde * 5000, colour = Age, group = Age), linetype = c("dashed"), data = Plot) +
geom_point(shape=18,aes(x = Density, y = tde * 5000, colour = Age, group = Age), position = pd, size = 4) +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
theme(legend.position="bottom") + guides(colour = guide_legend(), size = guide_legend(),
shape = guide_legend())
I have made the following graph,
in which the axis on the left is the scale of the circles and the axis on the right is the scale of the diamonds. The fact is that I would like to have a legend approximately like this:
But it is impossible for me, I have tried suggestions of other threads like scale_shape and different commands in guides but I have not got success. I just want to make clear the difference in what shape and color represent.
Would someone know how to help me?
Best regards,
What you should do is a panel plot to avoid the confusion of double axes:
library(dplyr)
library(tidyr)
Plot %>%
gather(measure, Result, Sensitivity, tde) %>%
ggplot(aes(x = Density, y = Result, colour=Age)) +
geom_errorbar(aes(ymin = inf, ymax = sup), width = .1, position = pd,
data = . %>% filter(measure == "Sensitivity")) +
geom_line(aes(group = Age), position = pd, linetype = "dashed") +
geom_point(position = pd, size = 4)+
# scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
scale_y_continuous(labels = scales::percent) +
facet_wrap(~measure, ncol = 1, scales = "free_y") +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
theme(legend.position="bottom")
But to do what you asked, you problem is that you have only 1 non-positional aesthetic mapped so you cannot get more than one legend. To force a second legend, you need to add a second mapping. It can be a dummy mapping that has no effect, as below we map alpha but then manually scale both levels to 100%. This solution is not advisable because, as you have done in your example of a desired legend, it is easy to mix up the mappings and have your viz tell a lie by mislabeling which points are sensitivity and which are detection rate.
ggplot(Plot, aes(x = Density, y = 100*Sensitivity, colour=Age, alpha = Age)) +
geom_errorbar(aes(ymin = 100*inf, ymax = 100*sup), width = .1, position = pd) +
geom_line(position = pd, aes(group = Age), linetype = c("dashed")) +
geom_point(position = pd, size = 4)+
scale_y_continuous(expand = c(0, 0),name = 'Sensitivity (%)',sec.axis = sec_axis(~./5, name = 'Breast cancer detection rate (per 1000 mammograms)', breaks = c(0,5,10,15,20),
labels = c('0‰',"5‰", '10‰', '15‰', '20‰')), limits = c(0,100)) +
geom_line(position = pd, aes(x = Density, y = tde * 5000, colour = Age, group = Age), linetype = c("dashed"), data = Plot) +
geom_point(shape=18,aes(x = Density, y = tde * 5000, colour = Age, group = Age), position = pd, size = 4) +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
scale_alpha_manual(values = c(1, 1)) +
guides(alpha = guide_legend("Sensitivity"),
color = guide_legend("Detection Rate", override.aes = list(shape = 18))) +
theme(legend.position="bottom")