R tidyverse enable use of multiple CPU cores - r

I have a core i9 in my work computer and I was wondering is it possible to enable use of multiple CPU cores to produce ggplots more quickly? Or is ggplot more GPU dependent?
Data (df):
The can be accessed here. I wasn't able to paste it since the data is big. And I guess this dataset is necessary for the question since I want to speed up ggplot.
Sample code:
df = read.csv("path/TMean.csv")
gg = df %>%
ggplot(aes(x = year, y = tmean)) +
geom_point(aes(color = "Temperature"), size = 2, shape = 1, alpha = 0.1) +
geom_smooth(method = lm, aes(linetype = "LM"), se = FALSE, color = "red") +
scale_linetype_manual(values = 2, name = NULL) +
scale_colour_manual(values = "deepskyblue4", name = "Legend") +
theme(text = element_text(size = 16)) +
xlab("Year") +
ylab("Mean Temperature (\u00B0C)") +
ggtitle("1980-2021 Historical Change")+
guides(color = guide_legend(override.aes = list(alpha = 0.5), order = 1))
# Model and label formula and R^2
lm(tmean ~ year, data = df) -> model_df
get_formula(model_df)
scales::percent(summary(df)$r.squared, accuracy = 0.01) -> r_square
summary(r_square)$r.squared -> r_squared_df
r_squared_df = round(r_squared_df, digits = 4)
#Now we need to add the text to the plot:
gg +
geom_text(x = 1983.2, y = 30.8,
label = paste0("Formula = ", get_formula(model_df)),
color = 'red') +
geom_text(x = 1980, y = 30.4,
label = paste0("R\U00B2 = ", r_squared_df),
color = 'blue')

Related

How to directly label regression lines within plot frame (without a legend)?

Data and Previous Content
This question is a continuation of a previous question with the same data but with a slight tweak.
Question
Same as before, this is the example I am looking to achieve, with the part I want now highlighted in green:
Instead of coloring a specific regression line, now I want to add a direct label to the plot window like above. I know that by faceting the data, we can achieve this with a legend, coloring the lines, etc. We can even manually add an annotation by selecting the x and y coordinates with annotate or geom_text.
But I want something that doesn't require a legend or manually figuring out where the exact geom coordinates are. Is there a way to simply add the label to a regression line within the plot window similar to other aes functions? This is the base plot I have so far, with the label now removed and regression lines colored:
ggplot(slack.work,
aes(x=Coffee_Cups,
y=Mins_Work,
color=Month_Name))+
geom_point(alpha = .4)+
geom_smooth(method = "lm",
se = F)+
scale_colour_viridis_d()+
theme_bw()+
labs(title = "Coffee Cups x Minutes of Productivity",
subtitle = "Pearson r = .30, p < .001",
x="Cups of Coffee",
y="Minutes of Work",
color="Month")+
theme(plot.title = element_text(face = "bold",
size = 15,
family = "mono"),
plot.subtitle = element_text(face = "italic"),
legend.position = "none")
Currently, it looks like this:
But I would like for it to look something like this:
Adapting this answer to your case you could achieve your desired result by using stat="smooth" via geom_text or ggrepel::geom_text_repel. The tricky part is to get only one label for which I use an ifelse inside after_stat:
library(ggplot2)
# Levels of Month_Name.
# Needed to get the month names.
# When using after_stat only get the level number via `group`
levels_month <- levels(factor(slack.work$Month_Name))
ggplot(
slack.work,
aes(
x = Coffee_Cups,
y = Mins_Work,
group = Month_Name,
color = Month_Name == "January"
)
) +
geom_point(alpha = .4) +
geom_smooth(
data = ~subset(.x, !Month_Name == "January"),
method = "lm",
se = F
) +
geom_smooth(
data = ~subset(.x, Month_Name == "January"),
method = "lm",
se = F
) +
ggrepel::geom_text_repel(aes(label = after_stat(ifelse(x %in% range(x)[1], levels_month[group], NA_character_))),
stat = "smooth", method = "lm",
nudge_x = -.5, direction = "y") +
scale_x_continuous(expand = expansion(add = c(.5, 0), mult =.05)) +
scale_colour_manual(values = c("TRUE" = "steelblue", "FALSE" = "grey65")) +
annotate("text",
x = 3,
y = 800,
label = "January had the strongest effect on productivity.",
size = 4,
color = "steelblue"
) +
theme_bw() +
labs(
title = "Coffee Cups x Minutes of Productivity",
subtitle = "Pearson r = .30, p < .001",
x = "Cups of Coffee",
y = "Minutes of Work",
color = "Month"
) +
theme(
plot.title = element_text(
face = "bold",
size = 15,
family = "mono"
),
plot.subtitle = element_text(face = "italic")
) +
guides(color = "none")
EDIT To get rid of the segments connecting the line and the label you could add min.segment.length = Inf to geom_text_repel:
... +
ggrepel::geom_text_repel(aes(label = after_stat(ifelse(x %in% range(x)[1], levels_month[group], NA_character_))),
stat = "smooth", method = "lm", min.segment.length = Inf,
nudge_x = -.5, direction = "y") +
...

Adding subscripts and symbols to facet_wrap in ggplot2

I am trying to create a 2x2 facet plot of some weather conditions over time, and am having trouble adding a degrees symbol and a superscript to some of the facet titles.
weatherPLOT = data.frame(weather = rep(c("Soil Temperature (C)",
"Snow Depth (m)",
"Air Temperature (C)",
"Discharge (m3/sec)"), each = 366),
day = 1:366,
mean = 3, # Obvious place holders,
Lo95 = 2, # Are actual numbers in real code
Hi95 = 4)
ggplot(aes(y = mean, x = day), data = weatherPLOT) +
geom_ribbon(aes(ymin = Lo95, ymax = Hi95), alpha = 0.25) +
geom_path(size = 1) +
theme(axis.title.y = element_blank()) + xlab("Julian Day") +
facet_wrap( ~ weather, nrow = 2, ncol = 2, scales = "free")
I know the trick is to use labeller inside of facet_wrap, but I can't to make it work - I'm just looking to add a degree symbol before the (C) and make the 3 in (m3/sec) superscripted.
The easiest way to do this to change the text values themselves to appropriate symbols and use the ggtext package for the formatting.
\u00b0 is the Unicode value for the degree sign. <sup>3</sup> is the ggtext Markdown code for superscript 3. You specify that theme text should be markdown by using ggtext::element_markdown().
library(ggplot2)
weatherPLOT = data.frame(weather = rep(c("Soil Temperature (\u00b0C)",
"Snow Depth (m)",
"Air Temperature (\u00b0C)",
"Discharge (m<sup>3</sup>/sec)"), each = 366),
day = 1:366,
mean = 3, # Obvious place holders,
Lo95 = 2, # Are actual numbers in real code
Hi95 = 4)
ggplot(aes(y = mean, x = day), data = weatherPLOT) +
geom_ribbon(aes(ymin = Lo95, ymax = Hi95), alpha = 0.25) +
geom_path(size = 1) +
labs(y = "", x = "Julian Day") +
theme(strip.text = ggtext::element_markdown()) +
facet_wrap( ~ weather, nrow = 2, ncol = 2, scales = "free")
Created on 2021-08-25 by the reprex package (v2.0.0)
Another way would be the following:
weatherPLOT %>%
mutate(weather = factor(weather,
labels = c(bquote('Air Temperature'*degree*C),
"Discharge~(m^{3}/sec)",
"Snow~Depth~(m)",
bquote('Soil Temperature'*degree*C)))) %>%
ggplot(aes(y = mean, x = day)) +
geom_ribbon(aes(ymin = Lo95, ymax = Hi95), alpha = 0.25) +
geom_path(size = 1) +
theme(axis.title.y = element_blank()) + xlab("Julian Day") +
facet_wrap( ~ weather, nrow = 2, ncol = 2, scales = "free", labeller = label_parsed)

ggplot2: draw lines connecting observations in each group in faceted boxplot

I have the following dataframe:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = as.factor(rep(c("X", "Y", "Z"), times = 30)), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2))
I want to get boxplots of X1 for each method and across the three patterns and within each complexity. I use the following plot:
ggplot(dd, aes(x = Pattern, y = X1, fill = Method)) +
facet_grid(~Complexity) +
geom_boxplot() +
theme(legend.position = 'bottom',
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(nrow=1))
which gives me the attached figure:
Fabulous. However, each observation for method A, B and C are on the same dataset (with indicator nsim) within 'X' (same for the cases within 'Y' and same for within 'Z') and I would like to link the observations (values of X1) between the three methods in each of the three patterns (but not link the three patterns because that would be meaningless).
Specifically, I want a plot as follows (with hand-drawn lines imagined to connect the different simulations IDS here):
So, I tried the following, however,I am getting the boxplots to no longer be bunched together for each x (and the x axes is now also messed up).
library(ggplot2)
ggplot(dd, aes(x = interaction(Method,Pattern), y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method), position = "identity") +
geom_line(aes(x = interaction(Method,Pattern), y = X1,
group=interaction(Pattern,nsim)),
size = 0.15, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = interaction(Method,Pattern),
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
geom_point(aes(x = interaction(Method,Pattern),
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "Y", "", "", "Z","")) + xlab("Pattern")
Which gives the following:
but the boxplots for each setting of Pattern gets separated (I would like them bunched together) and also the x-axes gets messed up (which I have somewhat of an inelegant fix). So, the most important thing I need resolved still is the space between the boxplots inside each Pattern (which I would like to be smaller) than the space between boxplots of different Patterns.
How do I fix this? Many thanks for your suggestions.
Is this what you're looking for?
ggplot(dd, aes(x = Pattern, y = X1, fill = Method)) +
geom_line(aes(group=interaction(Method,nsim)),
position = position_dodge(width = 0.8),
size = 0.1, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
geom_boxplot() +
theme(legend.position = 'bottom',
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(nrow=1)) +
theme_light()
I am not sure if there are better methods of doing this, but I did the following:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2))
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
geom_line(aes(x = dummy, y = X1,
group=interaction(Pattern,nsim)),
size = 0.15, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Dark2")
which yields the following:
I would like the boxplots to be closer to each other, and would like some advice on how to do this, if anyone has any ideas. Perhaps the next step will be to write this up as a general function.

Add label in ggplot

My code with the following output (below in the picture) calculates the average price of the neighbourhood groups.
Beside the mean I also want to add the median price label. How should I add this information to the graph?
{r }
p.nbr <- ny_explor %>%
group_by(neighbourhood_group) %>%
summarise(price = round(mean(price), 2))
ggplot(ny_explor, aes(price)) +
geom_histogram(bins = 30, aes(y = ..density..), fill = "darkslategrey") +
geom_density(alpha = 0.2, fill = "darkslategrey") +
theme_bw() +
ggtitle("Distribution of price by neighbourhood groups",
subtitle = expression("With" ~'log'[10] ~ "transformation of x-axis")) +
geom_vline(data = p.nbr, aes(xintercept = price), size = 2, linetype = 3) +
geom_text(data = p.nbr,y = 1.5, aes(x = price + 1400, label = paste("Mean = ",price)), color = "saddlebrown", size = 4) +
facet_wrap(~neighbourhood_group) +
scale_x_log10()
Though it would have been easier if you could include some sample data, yet it is advised that your existing code may be modified like this, which may work. If not, please incluide some sample data
p.nbr <- ny_explor %>%
group_by(neighbourhood_group) %>%
summarise(price_mean = round(mean(price), 2),
price_median = median(price))
ggplot(ny_explor, aes(price_mean)) +
geom_histogram(bins = 30, aes(y = ..density..), fill = "darkslategrey") +
geom_density(alpha = 0.2, fill = "darkslategrey") +
theme_bw() +
ggtitle("Distribution of price by neighbourhood groups",
subtitle = expression("With" ~'log'[10] ~ "transformation of x-axis")) +
geom_vline(data = p.nbr, aes(xintercept = price_mean), size = 2, linetype = 3) +
geom_text(data = p.nbr,y = 1.5, aes(x = price_mean + 1400, label = paste("Mean = ",price_mean),
"/nMedian = ", price_median), color = "saddlebrown", size = 4) +
facet_wrap(~neighbourhood_group) +
scale_x_log10()

customize two legends inside one graph in ggplot2

I wanted to comment on the following doubt.
Using this code:
Plot<-data.frame(Age=c(0,0,0,0,0),Density=c(0,0,0,0,0),Sensitivity=c(0,0,0,0,0),inf=c(0,0,0,0,0),sup=c(0,0,0,0,0),tde=c(0,0,0,0,0))
Plot[1,]<-c(1,1,0.857,0.793,0.904,0.00209834)
Plot[2,]<-c(1,2,0.771 ,0.74,0.799,0.00348286)
Plot[3,]<-c(1,3,0.763 ,0.717,0.804,0.00577784)
Plot[4,]<-c(1,4,0.724 ,0.653,0.785,0.00504161)
Plot[5,]<-c(2,1,0.906,0.866,0.934,0.00365742)
Plot[6,]<-c(2,2,0.785 ,0.754,0.813,0.00440399)
Plot[7,]<-c(2,3,0.660,0.593,0.722,0.00542849)
Plot[8,]<-c(2,4,0.544,0.425,0.658,0.00433052)
names(Plot)<-c("Age","Mammographyc density","Sensitivity","inf","sup","tde")
Plot$Age<-c("50-59","50-59","50-59","50-59","60-69","60-69","60-69","60-69")
Plot$Density<-c("Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense","Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense")
levels(Plot$Age)<-c("50-59","60-69")
levels(Plot$Density)<-c("Almost entirely fat","Scattered fibroglandular density","Heterogeneously dense","Extremely dense")
pd <- position_dodge(0.2) #
Plot$Density <- reorder(Plot$Density, 1-Plot$Sensitivity)
ggplot(Plot, aes(x = Density, y = 100*Sensitivity, colour=Age)) +
geom_errorbar(aes(ymin = 100*inf, ymax = 100*sup), width = .1, position = pd) +
geom_line(position = pd, aes(group = Age), linetype = c("dashed")) +
geom_point(position = pd, size = 4)+
scale_y_continuous(expand = c(0, 0),name = 'Sensitivity (%)',sec.axis = sec_axis(~./5, name = 'Breast cancer detection rate (per 1000 mammograms)', breaks = c(0,5,10,15,20),
labels = c('0‰',"5‰", '10‰', '15‰', '20‰')), limits = c(0,100)) +
geom_line(position = pd, aes(x = Density, y = tde * 5000, colour = Age, group = Age), linetype = c("dashed"), data = Plot) +
geom_point(shape=18,aes(x = Density, y = tde * 5000, colour = Age, group = Age), position = pd, size = 4) +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
theme(legend.position="bottom") + guides(colour = guide_legend(), size = guide_legend(),
shape = guide_legend())
I have made the following graph,
in which the axis on the left is the scale of the circles and the axis on the right is the scale of the diamonds. The fact is that I would like to have a legend approximately like this:
But it is impossible for me, I have tried suggestions of other threads like scale_shape and different commands in guides but I have not got success. I just want to make clear the difference in what shape and color represent.
Would someone know how to help me?
Best regards,
What you should do is a panel plot to avoid the confusion of double axes:
library(dplyr)
library(tidyr)
Plot %>%
gather(measure, Result, Sensitivity, tde) %>%
ggplot(aes(x = Density, y = Result, colour=Age)) +
geom_errorbar(aes(ymin = inf, ymax = sup), width = .1, position = pd,
data = . %>% filter(measure == "Sensitivity")) +
geom_line(aes(group = Age), position = pd, linetype = "dashed") +
geom_point(position = pd, size = 4)+
# scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
scale_y_continuous(labels = scales::percent) +
facet_wrap(~measure, ncol = 1, scales = "free_y") +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
theme(legend.position="bottom")
But to do what you asked, you problem is that you have only 1 non-positional aesthetic mapped so you cannot get more than one legend. To force a second legend, you need to add a second mapping. It can be a dummy mapping that has no effect, as below we map alpha but then manually scale both levels to 100%. This solution is not advisable because, as you have done in your example of a desired legend, it is easy to mix up the mappings and have your viz tell a lie by mislabeling which points are sensitivity and which are detection rate.
ggplot(Plot, aes(x = Density, y = 100*Sensitivity, colour=Age, alpha = Age)) +
geom_errorbar(aes(ymin = 100*inf, ymax = 100*sup), width = .1, position = pd) +
geom_line(position = pd, aes(group = Age), linetype = c("dashed")) +
geom_point(position = pd, size = 4)+
scale_y_continuous(expand = c(0, 0),name = 'Sensitivity (%)',sec.axis = sec_axis(~./5, name = 'Breast cancer detection rate (per 1000 mammograms)', breaks = c(0,5,10,15,20),
labels = c('0‰',"5‰", '10‰', '15‰', '20‰')), limits = c(0,100)) +
geom_line(position = pd, aes(x = Density, y = tde * 5000, colour = Age, group = Age), linetype = c("dashed"), data = Plot) +
geom_point(shape=18,aes(x = Density, y = tde * 5000, colour = Age, group = Age), position = pd, size = 4) +
theme_light() +
scale_color_manual(name="Age (years)",values = c("50-59"= "grey55", "60-69" = "grey15")) +
scale_alpha_manual(values = c(1, 1)) +
guides(alpha = guide_legend("Sensitivity"),
color = guide_legend("Detection Rate", override.aes = list(shape = 18))) +
theme(legend.position="bottom")

Resources