ggpattern customize legend / manual scale pattern - r

I want to customize my legend and the pattern shapes in ggpattern but when I run the code two legends appear on the side. I also would like the pattern color/stripes to be black when right now they are grey even when pattern_color="black.
df <- data.frame(
study_id = c(3, 3, 3), primary_therapy = c("Si", "Si", "Si"),
additional_therapy = c("NA", "S", "V+S"), end_yr = c(0.08, 0.39, 3.03)
)
swimmer_plot(
df = df, id = "study_id",
end = "end_yr", name_fill = "primary_therapy",
width = 0.85, color = NA
) + geom_col_pattern(aes(study_id, end_yr,
pattern = additional_therapy, pattern_angle = additional_therapy),
fill = NA, na.rm=FALSE, show.legend=NA, width=0.85,
pattern_spacing = 0.01, pattern_color = "black", pattern_size = 0.5,
pattern_linetype = 0.5, pattern_orientation="vertical") +
scale_pattern_manual(name="Additional Therapy", values = c("S"="stripe","NA"="none","V+S"="circle"))

First, with geom_col_pattern, show.legend = can only be TRUE or FALSE; NA isn't a valid option. Second, remove the pattern_angle argument from geom_col_pattern, since I think that's what's causing the second legend. Third, change pattern_color to pattern_fill to make the lines black.
library(swimplot)
library(ggpattern)
library(tidyverse)
df <- data.frame(
study_id = c(3, 3, 3), primary_therapy = c("Si", "Si", "Si"),
additional_therapy = c("NA", "S", "V+S"), end_yr = c(0.08, 0.39, 3.03)
)
swimmer_plot(
df = df, id = "study_id",
end = "end_yr", name_fill = "primary_therapy",
width = 0.85, color = NA) +
geom_col_pattern(aes(x = study_id, y = end_yr,
pattern = additional_therapy),
show.legend = TRUE,
fill = NA,
na.rm = FALSE,
width = 0.85,
pattern_spacing = 0.01,
pattern_fill = "black",
pattern_size = 0.5,
pattern_linetype = 0.5,
pattern_orientation = "vertical") +
scale_pattern_manual(name="Additional Therapy",
values = c("S"="stripe","NA"="none","V+S"="circle"))

Related

Can individual legend have different 'element.key' theme in ggplot?

I have multiple legends in ggplot and want to control the legend.key for different legends. Is there a way to do this?
library(tidyverse)
set.seed(12345)
brks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
fd = expand.grid(x = seq(6,16, length.out = 100),
y = seq(6,18, length.out = 100))
fd$z = sample(x = seq(0,1, length.out = 100), size = nrow(fd), replace = T)
df.t = data.frame(s = LETTERS[1:5], l = c(11,12,8,15,14), d = c(13,10,7,16,8))
mypal = data.frame(A = "black", B = "red",C = "blue", D = "green", E = "yellow")
ggplot(data = fd, mapping = aes(x = x, y = y, z = z)) +
geom_contour_filled(breaks = brks)+
geom_point(data = df.t,
mapping = aes(x = l, y = d, color = s), inherit.aes = FALSE,
size = 5) +
scale_fill_manual(values = alpha(hcl.colors(100, "YlOrRd", rev = TRUE, alpha = 1), .99))+
scale_color_manual(values = alpha(mypal,1),
name = "obj") +
theme(legend.key = element_rect(fill = "pink",colour = "darkblue"))

Error in FUN(X[[i]], ...) : object 'estimate' not found

I am tryin to add p-values of 3 factors T-test on ggplot. But it keeps warning me this kind of error calls 'Error in FUN(X[[i]], ...) : object 'estimate' not found'. My coding is following:
Fisrt, I did a t-test
mixt.test_others<-studymix_fit %>%
group_by(reciprocity,stimu)%>%
t_test(Study_rate ~ estimate)%>%
adjust_pvalue()%>%
add_significance()
mixt.test_others1 <- mixt.test_others %>%
add_xy_position(x = "estimate", fun = "mean_sd", dodge = 0.8)
Then I add it to ggplot
ggplot(studymix_fit,aes(x=reciprocity, y=Study_rate,color = estimate,
fill = estimate))+
facet_grid(. ~ stimu)+
geom_violin(alpha = 0.1, adjust = 1.5)+
geom_boxplot(width = 0.1,alpha = 0.2,
position = position_dodge(width = 0.9))+
geom_dotplot(binaxis = "y", stackdir = 'center',
dotsize = 0.5, alpha = 0.5,
position = position_dodge(width = 0.9) )+
stat_summary(fun.data="mean_sdl", fun.args = list(mult=1),
geom="pointrange",
color = "red", alpha = 1,width = 0.15,
position = position_dodge(width = 0.9))+
stat_compare_means(method = 'anova', label.y = 1.4)+
add_pvalue(mixt.test_others1,
label = "p = {p.adj.signif}",
tip.length = 0.01,
step.increase = 0.05,
y.position = 1.05)+
theme_classic()+
scale_fill_brewer(type = 'div', palette = 'Accent', direction = 1)+
scale_color_brewer(type = 'div', palette = 'Accent', direction = 1)+
labs(x="Reciprocity",y="Select Rate of Positive Reciprocity")
Here are part of my dataset:
structure(list(ID = c(102, 102, 102, 102, 103),
condition = c("A", "C", "B", "D", "A"),
Study_rate = c(1, 0, 1, 0, 0.666666667),
reciprocity = c("PS", "NS", "PS", "NS", "PS"),
estimate = c("PO", "PO", "NO", "NO", "PO"),
stimu = c("subject", "subject", "subject", "subject", "subject"))
Actually, I checked my original data set, it seems no problem.
I wanna know if I did the 3 factor t-test well?
Is there any other problem I made?
Please help me figure it out, thanks very much
If anyone has similar problems, we can disccus and figure them out~
Thanks

ggpubr::ggarrange not arranging plot made with ggpattern

I am using ggarrange to produce a figure with three plots. One of these plots includes fill and a pattern created using ggpattern. I am able to create a combined figure when I combine one standard ggplot and the ggpattern plot, but I get an error (Error in seq.default(from, to, by) : invalid '(to - from)/by') when I try to combine all 3. I've included a simplified example below.
#fake data
data <- structure(list(Level= c(0.2, 0.3, 0.25, 0.35, 0.4, 0.5, 0.5, 0.6, 0.15, 0.35),
Group= c("A", "A", "B", "B", "C", "C", "D", "D", "E", "E"),
Condition = c("no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes"),
Hx = c(0,1,1,1,0,1,0,0,0,0),
Type = c("T", "T", "T", "T", "T", "F", "F", "F", "F", "F")),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
#create plots
pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level)) +
facet_grid(cols=vars(Type)) +
geom_bar_pattern(aes(pattern = Condition, fill = as.factor(Hx)),
stat = "identity",
position = "dodge",
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_pattern_manual(values = c("none", "stripe")) +
labs(x = "", y = "", pattern = "Condition", fill = "History",
subtitle = "Percentage of people with condition") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none"))) +
theme_bw() +
theme(legend.position="left") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
no_pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level, fill = Condition)) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
no_pattern_plot2 <- ggplot(data = data, aes(x = Group, y = Level, fill = as.factor(Hx))) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
This works:
ggarrange(pattern_plot, no_pattern_plot2, nrow = 2)
To create this
plot
However ggarrange(pattern_plot, no_pattern_plot, no_pattern_plot2, nrow = 3)
produces Error in seq.default(from, to, by) : invalid '(to - from)/by'
Any ideas?

Plotting kaplan meier survival data if dataset already contains time , survival probability, and upper/lower 95% confidence intervals

I have a kaplan meier survival dataset that already contains time, survival probabilities values, and survival probability data points for both the lower & upper 95% CI. I have posted a clip of my dataset below. I was hoping if anyone knew how merge my two plots, normalize them, and make my plots continuous despite missing values. I was hoping for my final graph to look like this 2.
kmcurvetest_2[1:20, ] %>% dput()
structure(list(Time = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 15, 16, 17, 18, 19, 20), Cohort1 = c(0.904255319148936,
0.898936170212766, 0.887769261266023, 0.887769261266023, 0.887769261266023,
0.87631417402388, 0.87631417402388, NA, NA, 0.87631417402388,
0.864551567661143, 0.858629981581273, 0.852708395501402, NA,
0.852708395501402, 0.846745399728665, 0.846745399728665, 0.840740113205766,
NA, 0.840740113205766), C1Lower95 = c(0.852338104650895, 0.846140749965675,
0.833054851312184, 0.833054851312184, 0.833054851312184, 0.819696863257612,
0.819696863257612, NA, NA, 0.819696863257612, 0.806043967960357,
0.799218079053227, 0.792429563598159, NA, 0.792429563598159,
0.785616930383783, 0.785616930383783, 0.778778500012501, NA,
0.778778500012501), C1Upper95 = c(0.938570469008423, 0.934312293965728,
0.92534844712446, 0.92534844712446, 0.92534844712446, 0.916056348120451,
0.916056348120451, NA, NA, 0.916056348120451, 0.906427391600421,
0.901537491012523, 0.8966168920045, NA, 0.8966168920045, 0.891638921203334,
0.891638921203334, 0.886603579837755, NA, 0.886603579837755),
Cohort2 = c(0.707462686567164, 0.692537313432835, 0.683384837924912,
0.674232362416989, 0.674232362416989, 0.668074989244231,
NA, 0.664996302657852, 0.664996302657852, 0.658781383941424,
0.652507275522934, 0.649370221313689, 0.646217938685953,
0.643065656058216, 0.630394411603867, 0.62722660049028, 0.624058789376693,
0.620890978263105, 0.617723167149518, 0.614539027112665),
C2Lower95 = c(0.655564487332025, 0.640091667602195, 0.630607727619003,
0.62114710952213, 0.62114710952213, 0.614788099004335, NA,
0.611612499799214, 0.611612499799214, 0.605202384226936,
0.598734349944198, 0.595504428845739, 0.592259587632446,
0.589017489398546, 0.576004700295779, 0.572758317180272,
0.569514623188025, 0.566273601091399, 0.56303523423295, 0.5597807789553
), C2Upper95 = c(0.753046097156017, 0.738936670959587, 0.730275198102735,
0.721591223004285, 0.721591223004285, 0.715742377703966,
NA, 0.712814219355565, 0.712814219355565, 0.706901638437748,
0.700928732359048, 0.697938428282602, 0.694932646561064,
0.691924293962202, 0.679812432812405, 0.67677809121533, 0.673741229385084,
0.670701861632804, 0.667660001811057, 0.664601682804447)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
My data set contains missing values and I attempted to make my geom_line continuous despite the missing values using ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)
My codes for the two plots are...
# plot cohort 1
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort1),] , mapping = aes(x = Time, y = Cohort1)) +
geom_point(size = 1 ) +
geom_line(color = "blue") +
geom_ribbon(aes(x = Time, ymin = C1Lower95, ymax = C1Upper95),
fill = "blue", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
# plot cohort 2
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)) +
geom_point(size = 1 ) +
geom_line(color = "red") +
geom_ribbon(aes(x = Time, ymin = C2Lower95, ymax = C2Upper95),
fill = "red", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
Thank you I really appreciate it - I have attached the images in question for reference above!
Something like this?
library(tidyverse)
df1 <- df %>%
slice(1) %>%
mutate(across(-time, ~paste(1))) %>%
type.convert(as.is = TRUE) %>%
bind_rows(df %>% mutate(time = time+1))
ggplot(df1, aes(x=factor(time), group=1)) +
geom_line(data = df1 %>% dplyr::select(1:4), aes(y=C1survival, color = "red"), size=1)+
geom_point(data = df1 %>% dplyr::select(1:4), aes(y=C1survival), shape = 3, color = "black")+
geom_ribbon(data = df1 %>% dplyr::select(1:4), aes(ymin = C1lower95.CI, ymax = C1upper95.CI), alpha = 0.2)+
labs(title = paste("Survival cohort1"), x = "Time [days]", y = "Survival [%]") +
geom_line(data = df1 %>% dplyr::select(1, 5:7), aes(y=C2survival, color = "blue"), size=1)+
geom_point(data = df1 %>% dplyr::select(1:5:7), aes(y=C2survival), shape = 3, color = "black")+
geom_ribbon(data = df1 %>% dplyr::select(1, 5:7), aes(ymin = C2lower95.CI, ymax = C2upper95.CI), alpha = 0.2)+
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1", "Cohort2"))+
theme_classic()+
theme(
axis.line = element_line(colour = "black", size = 0.24),
aspect.ratio = 4 / 5,
legend.position = "bottom",
legend.box = "horizontal")
data:
structure(list(time = 0:8, C1survival = c(0.904255319, 0.89893617,
0.887769261, 0.887769261, 0.887769261, 0.876314174, 0.876314174,
0.664996303, 0.664996303), C1lower95.CI = c(0.852338105, 0.84614075,
0.833054851, 0.833054851, 0.833054851, 0.819696863, 0.819696863,
0.6116125, 0.6116125), C1upper95.CI = c(0.938570469, 0.934312294,
0.925348447, 0.925348447, 0.925348447, 0.916056348, 0.916056348,
0.712814219, 0.712814219), C2survival = c(0.707462687, 0.692537313,
0.683384838, 0.674232362, 0.674232362, 0.668074989, NA, NA, NA
), C2lower95.CI = c(0.655564487, 0.640091668, 0.630607728, 0.62114711,
0.62114711, 0.614788099, NA, NA, NA), C2upper95.CI = c(0.753046097,
0.738936671, 0.730275198, 0.721591223, 0.721591223, 0.715742378,
NA, NA, NA)), class = "data.frame", row.names = c(NA, -9L))

How to align labels in cross bars

I am trying to align three text labels i.e. mean, median and current value outside the crossbars.I appreciate any help.
My Data
structure(list(variable = structure(1:10, .Label = c("GrossNetEquity",
"GrossTotalEquityPerfAttr", "LongNetEquity", "LongTotalEquity",
"NetEquity", "NetEquityPerfAttr", "NetTotalEquity", "ShortNetEquity",
"ShortTotalEquity", "TotalNetEquity"), class = "factor"), mx = c(134.5,
8.1, 95.6, 106.4, 61, 6.8, 71.6, -21.4, -24.9, 148.7), mn = c(71.1,
-4.6, 49.7, 66.2, 27, -4.1, 36.4, -46.3, -47.4, 96), avg = c(112.173148148148,
1.14814814814815, 77.7388888888889, 84.5111111111111, 43.262037037037,
1.05092592592593, 48.0694444444444, -34.4194444444444, -36.4416666666667,
120.952777777778), sd = c(14.5968093202928, 2.39877232936504,
9.87368667081958, 8.7204382695887, 7.29159953981859, 2.24405738054356,
7.05196278547511, 6.04899711056417, 5.77265751334298, 13.0003483658092
), md = c(114.15, 1.4, 77.35, 82.65, 41.45, 1.25, 46.35, -34.1,
-35.55, 119.75), firstldiff = c(82.9795295075625, -3.64939651058193,
57.9915155472497, 67.0702345719337, 28.6788379573998, -3.4371888351612,
33.9655188734942, -46.5174386655728, -47.9869816933526, 94.9520810461593
), firstlsum = c(141.366766788734, 5.94569280687823, 97.4862622305281,
101.951987650289, 57.8452361166742, 5.53904068701305, 62.1733700153947,
-22.3214502233161, -24.8963516399807, 146.953474509396), secldiff = c(68.3827201872697,
-6.04816883994697, 48.1178288764302, 58.349796302345, 21.3872384175813,
-5.68124621570476, 26.9135560880191, -52.566435776137, -53.7596392066956,
81.9517326803501), seclsum = c(155.963576109027, 8.34446513624327,
107.359948901348, 110.672425919877, 65.1368356564928, 7.78309806755661,
69.2253328008698, -16.2724531127519, -19.1236941266377, 159.953822875205
), value = c(116.1, -1.2, 88, 92.3, 58.8, -1.2, 63, -28.1, -29.3,
121.6), Criteria = c(NA, NA, "", "", "orange", "", "orange",
"orange", "", "orange")), .Names = c("variable", "mx", "mn",
"avg", "sd", "md", "firstldiff", "firstlsum", "secldiff", "seclsum",
"value", "Criteria"), row.names = c(NA, -10L), class = "data.frame")
My Code
I am trying to show Mean, Median and Current Value in the form of bars on geom_crossbar.But finding it hard to align it.
ggplot(df3,aes(variable,mn))+
geom_crossbar(aes(ymin = mn, ymax = mx,fill = Criteria),
width = 0.5,alpha = 0.50,position =position_dodge())+
geom_point(data=df3, aes(x=variable,y=md,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3, aes(x=variable, y=md, label = paste("Median",md)),
size = 3, vjust = 2,hjust = -1.0,color = "brown1",
position = position_dodge(width=0.9))+
geom_point(data=df3, aes(x=variable,y=avg,group=1),
shape = "|", size = 10,color = "coral4")+
geom_text(data=df3, aes(x=variable, y=avg, label = paste("Mean",mn)),
size = 3, vjust = 2.5, hjust = -1.0,color ="coral4")+
geom_point(data=df3, aes(x=variable,y=value,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3,aes(x=variable, y=value,label = paste("Current Value",value)),
size = 2, vjust = 3, hjust = -1.0,color = "brown1")+
coord_flip()
If you wish to align your geom_text layers, you can assign them the same y value. I've included an example below. I also removed some repetitive parts from your code, where the different layers can inherit the data / aesthetic mappings from the top ggplot() level.
ggplot(df3, aes(variable, mx))+
geom_crossbar(aes(ymin = mn, ymax = mx, fill = Criteria),
width = 0.5, alpha = 0.50, position = position_dodge()) +
# vertical bars
geom_point(aes(y = md), shape = "|", size = 10, color ="brown1") +
geom_point(aes(y = avg), shape = "|", size = 10, color = "coral4") +
geom_point(aes(y = value), shape = "|", size = 10, color ="brown1") +
# labels (vjust used to move the three layers vertically away from one another;
# nudge_y used to shift them uniformly rightwards)
# note that the original label for "Mean" used paste("Mean", mn), but that didn't
# look right to me, since the vertical bar above used avg instead of mn, & mn appears
# to correspond to "min", not "mean".
geom_text(aes(label = paste("Median", md)),
size = 3, vjust = -1, nudge_y = 5, hjust = 0, color = "brown1") +
geom_text(aes(label = paste("Mean", avg)),
size = 3, vjust = 0, nudge_y = 5, hjust = 0, color ="coral4") +
geom_text(aes(label = paste("Current Value", value)),
size = 2, vjust = 1, nudge_y = 5, hjust = 0, color = "brown1") +
coord_flip() +
expand_limits(y = 200) # expand rightwards to give more space for labels
Note: The above follows the approach in your code, which repeats the same geom layers for different columns in the wide format data. In general, ggplot prefers to deal with data in long format. It looks cleaner, and would be easier to maintain as you only need to make changes (e.g. increase font size, change number of decimal places in the label) once, rather than repeat the change for every affected layer. A long format approach to this problem could look like this:
# create long format data frame for labels
df3.labels <- df3 %>%
select(variable, mx, md, avg, value) %>%
tidyr::gather(type, value, -variable, -mx) %>%
mutate(label = paste0(case_when(type == "md" ~ "Median",
type == "avg" ~ "Mean",
TRUE ~ "Current Value"),
": ",
round(value, 2)),
vjust = case_when(type == "md" ~ -1,
type == "avg" ~ 0,
TRUE ~ 1))
# place df3.labels in the top level call, since there are two geom layers that
# use it as the data source, & only one that uses df3.
ggplot(df3.labels,
aes(x = variable, y = value, color = type, label = label)) +
geom_crossbar(data = df3,
aes(x = variable, y = mn, ymin = mn, ymax = mx, fill = Criteria),
inherit.aes = FALSE,
width = 0.5, alpha = 0.50) +
geom_point(shape = "|", size = 10) +
geom_text(aes(y = mx, vjust = vjust), size = 3, nudge_y = 5, hjust = 0) +
# change colour mappings here
scale_color_manual(values = c("md" = "brown1", "avg" = "coral4", "value" = "brown1"),
guide = FALSE) +
coord_flip() +
expand_limits(y = 200)

Resources