Related
I have multiple legends in ggplot and want to control the legend.key for different legends. Is there a way to do this?
library(tidyverse)
set.seed(12345)
brks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
fd = expand.grid(x = seq(6,16, length.out = 100),
y = seq(6,18, length.out = 100))
fd$z = sample(x = seq(0,1, length.out = 100), size = nrow(fd), replace = T)
df.t = data.frame(s = LETTERS[1:5], l = c(11,12,8,15,14), d = c(13,10,7,16,8))
mypal = data.frame(A = "black", B = "red",C = "blue", D = "green", E = "yellow")
ggplot(data = fd, mapping = aes(x = x, y = y, z = z)) +
geom_contour_filled(breaks = brks)+
geom_point(data = df.t,
mapping = aes(x = l, y = d, color = s), inherit.aes = FALSE,
size = 5) +
scale_fill_manual(values = alpha(hcl.colors(100, "YlOrRd", rev = TRUE, alpha = 1), .99))+
scale_color_manual(values = alpha(mypal,1),
name = "obj") +
theme(legend.key = element_rect(fill = "pink",colour = "darkblue"))
I am tryin to add p-values of 3 factors T-test on ggplot. But it keeps warning me this kind of error calls 'Error in FUN(X[[i]], ...) : object 'estimate' not found'. My coding is following:
Fisrt, I did a t-test
mixt.test_others<-studymix_fit %>%
group_by(reciprocity,stimu)%>%
t_test(Study_rate ~ estimate)%>%
adjust_pvalue()%>%
add_significance()
mixt.test_others1 <- mixt.test_others %>%
add_xy_position(x = "estimate", fun = "mean_sd", dodge = 0.8)
Then I add it to ggplot
ggplot(studymix_fit,aes(x=reciprocity, y=Study_rate,color = estimate,
fill = estimate))+
facet_grid(. ~ stimu)+
geom_violin(alpha = 0.1, adjust = 1.5)+
geom_boxplot(width = 0.1,alpha = 0.2,
position = position_dodge(width = 0.9))+
geom_dotplot(binaxis = "y", stackdir = 'center',
dotsize = 0.5, alpha = 0.5,
position = position_dodge(width = 0.9) )+
stat_summary(fun.data="mean_sdl", fun.args = list(mult=1),
geom="pointrange",
color = "red", alpha = 1,width = 0.15,
position = position_dodge(width = 0.9))+
stat_compare_means(method = 'anova', label.y = 1.4)+
add_pvalue(mixt.test_others1,
label = "p = {p.adj.signif}",
tip.length = 0.01,
step.increase = 0.05,
y.position = 1.05)+
theme_classic()+
scale_fill_brewer(type = 'div', palette = 'Accent', direction = 1)+
scale_color_brewer(type = 'div', palette = 'Accent', direction = 1)+
labs(x="Reciprocity",y="Select Rate of Positive Reciprocity")
Here are part of my dataset:
structure(list(ID = c(102, 102, 102, 102, 103),
condition = c("A", "C", "B", "D", "A"),
Study_rate = c(1, 0, 1, 0, 0.666666667),
reciprocity = c("PS", "NS", "PS", "NS", "PS"),
estimate = c("PO", "PO", "NO", "NO", "PO"),
stimu = c("subject", "subject", "subject", "subject", "subject"))
Actually, I checked my original data set, it seems no problem.
I wanna know if I did the 3 factor t-test well?
Is there any other problem I made?
Please help me figure it out, thanks very much
If anyone has similar problems, we can disccus and figure them out~
Thanks
I am using ggarrange to produce a figure with three plots. One of these plots includes fill and a pattern created using ggpattern. I am able to create a combined figure when I combine one standard ggplot and the ggpattern plot, but I get an error (Error in seq.default(from, to, by) : invalid '(to - from)/by') when I try to combine all 3. I've included a simplified example below.
#fake data
data <- structure(list(Level= c(0.2, 0.3, 0.25, 0.35, 0.4, 0.5, 0.5, 0.6, 0.15, 0.35),
Group= c("A", "A", "B", "B", "C", "C", "D", "D", "E", "E"),
Condition = c("no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes"),
Hx = c(0,1,1,1,0,1,0,0,0,0),
Type = c("T", "T", "T", "T", "T", "F", "F", "F", "F", "F")),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
#create plots
pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level)) +
facet_grid(cols=vars(Type)) +
geom_bar_pattern(aes(pattern = Condition, fill = as.factor(Hx)),
stat = "identity",
position = "dodge",
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6) +
scale_pattern_manual(values = c("none", "stripe")) +
labs(x = "", y = "", pattern = "Condition", fill = "History",
subtitle = "Percentage of people with condition") +
guides(pattern = guide_legend(override.aes = list(fill = "white")),
fill = guide_legend(override.aes = list(pattern = "none"))) +
theme_bw() +
theme(legend.position="left") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1))
no_pattern_plot <-
ggplot(data = data, aes(x = Group, y = Level, fill = Condition)) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
no_pattern_plot2 <- ggplot(data = data, aes(x = Group, y = Level, fill = as.factor(Hx))) +
geom_bar(position = "dodge", stat = "identity") +
theme_bw() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
theme(legend.position="left")
This works:
ggarrange(pattern_plot, no_pattern_plot2, nrow = 2)
To create this
plot
However ggarrange(pattern_plot, no_pattern_plot, no_pattern_plot2, nrow = 3)
produces Error in seq.default(from, to, by) : invalid '(to - from)/by'
Any ideas?
I have a kaplan meier survival dataset that already contains time, survival probabilities values, and survival probability data points for both the lower & upper 95% CI. I have posted a clip of my dataset below. I was hoping if anyone knew how merge my two plots, normalize them, and make my plots continuous despite missing values. I was hoping for my final graph to look like this 2.
kmcurvetest_2[1:20, ] %>% dput()
structure(list(Time = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 15, 16, 17, 18, 19, 20), Cohort1 = c(0.904255319148936,
0.898936170212766, 0.887769261266023, 0.887769261266023, 0.887769261266023,
0.87631417402388, 0.87631417402388, NA, NA, 0.87631417402388,
0.864551567661143, 0.858629981581273, 0.852708395501402, NA,
0.852708395501402, 0.846745399728665, 0.846745399728665, 0.840740113205766,
NA, 0.840740113205766), C1Lower95 = c(0.852338104650895, 0.846140749965675,
0.833054851312184, 0.833054851312184, 0.833054851312184, 0.819696863257612,
0.819696863257612, NA, NA, 0.819696863257612, 0.806043967960357,
0.799218079053227, 0.792429563598159, NA, 0.792429563598159,
0.785616930383783, 0.785616930383783, 0.778778500012501, NA,
0.778778500012501), C1Upper95 = c(0.938570469008423, 0.934312293965728,
0.92534844712446, 0.92534844712446, 0.92534844712446, 0.916056348120451,
0.916056348120451, NA, NA, 0.916056348120451, 0.906427391600421,
0.901537491012523, 0.8966168920045, NA, 0.8966168920045, 0.891638921203334,
0.891638921203334, 0.886603579837755, NA, 0.886603579837755),
Cohort2 = c(0.707462686567164, 0.692537313432835, 0.683384837924912,
0.674232362416989, 0.674232362416989, 0.668074989244231,
NA, 0.664996302657852, 0.664996302657852, 0.658781383941424,
0.652507275522934, 0.649370221313689, 0.646217938685953,
0.643065656058216, 0.630394411603867, 0.62722660049028, 0.624058789376693,
0.620890978263105, 0.617723167149518, 0.614539027112665),
C2Lower95 = c(0.655564487332025, 0.640091667602195, 0.630607727619003,
0.62114710952213, 0.62114710952213, 0.614788099004335, NA,
0.611612499799214, 0.611612499799214, 0.605202384226936,
0.598734349944198, 0.595504428845739, 0.592259587632446,
0.589017489398546, 0.576004700295779, 0.572758317180272,
0.569514623188025, 0.566273601091399, 0.56303523423295, 0.5597807789553
), C2Upper95 = c(0.753046097156017, 0.738936670959587, 0.730275198102735,
0.721591223004285, 0.721591223004285, 0.715742377703966,
NA, 0.712814219355565, 0.712814219355565, 0.706901638437748,
0.700928732359048, 0.697938428282602, 0.694932646561064,
0.691924293962202, 0.679812432812405, 0.67677809121533, 0.673741229385084,
0.670701861632804, 0.667660001811057, 0.664601682804447)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
My data set contains missing values and I attempted to make my geom_line continuous despite the missing values using ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)
My codes for the two plots are...
# plot cohort 1
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort1),] , mapping = aes(x = Time, y = Cohort1)) +
geom_point(size = 1 ) +
geom_line(color = "blue") +
geom_ribbon(aes(x = Time, ymin = C1Lower95, ymax = C1Upper95),
fill = "blue", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
# plot cohort 2
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)) +
geom_point(size = 1 ) +
geom_line(color = "red") +
geom_ribbon(aes(x = Time, ymin = C2Lower95, ymax = C2Upper95),
fill = "red", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
Thank you I really appreciate it - I have attached the images in question for reference above!
Something like this?
library(tidyverse)
df1 <- df %>%
slice(1) %>%
mutate(across(-time, ~paste(1))) %>%
type.convert(as.is = TRUE) %>%
bind_rows(df %>% mutate(time = time+1))
ggplot(df1, aes(x=factor(time), group=1)) +
geom_line(data = df1 %>% dplyr::select(1:4), aes(y=C1survival, color = "red"), size=1)+
geom_point(data = df1 %>% dplyr::select(1:4), aes(y=C1survival), shape = 3, color = "black")+
geom_ribbon(data = df1 %>% dplyr::select(1:4), aes(ymin = C1lower95.CI, ymax = C1upper95.CI), alpha = 0.2)+
labs(title = paste("Survival cohort1"), x = "Time [days]", y = "Survival [%]") +
geom_line(data = df1 %>% dplyr::select(1, 5:7), aes(y=C2survival, color = "blue"), size=1)+
geom_point(data = df1 %>% dplyr::select(1:5:7), aes(y=C2survival), shape = 3, color = "black")+
geom_ribbon(data = df1 %>% dplyr::select(1, 5:7), aes(ymin = C2lower95.CI, ymax = C2upper95.CI), alpha = 0.2)+
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1", "Cohort2"))+
theme_classic()+
theme(
axis.line = element_line(colour = "black", size = 0.24),
aspect.ratio = 4 / 5,
legend.position = "bottom",
legend.box = "horizontal")
data:
structure(list(time = 0:8, C1survival = c(0.904255319, 0.89893617,
0.887769261, 0.887769261, 0.887769261, 0.876314174, 0.876314174,
0.664996303, 0.664996303), C1lower95.CI = c(0.852338105, 0.84614075,
0.833054851, 0.833054851, 0.833054851, 0.819696863, 0.819696863,
0.6116125, 0.6116125), C1upper95.CI = c(0.938570469, 0.934312294,
0.925348447, 0.925348447, 0.925348447, 0.916056348, 0.916056348,
0.712814219, 0.712814219), C2survival = c(0.707462687, 0.692537313,
0.683384838, 0.674232362, 0.674232362, 0.668074989, NA, NA, NA
), C2lower95.CI = c(0.655564487, 0.640091668, 0.630607728, 0.62114711,
0.62114711, 0.614788099, NA, NA, NA), C2upper95.CI = c(0.753046097,
0.738936671, 0.730275198, 0.721591223, 0.721591223, 0.715742378,
NA, NA, NA)), class = "data.frame", row.names = c(NA, -9L))
I am trying to align three text labels i.e. mean, median and current value outside the crossbars.I appreciate any help.
My Data
structure(list(variable = structure(1:10, .Label = c("GrossNetEquity",
"GrossTotalEquityPerfAttr", "LongNetEquity", "LongTotalEquity",
"NetEquity", "NetEquityPerfAttr", "NetTotalEquity", "ShortNetEquity",
"ShortTotalEquity", "TotalNetEquity"), class = "factor"), mx = c(134.5,
8.1, 95.6, 106.4, 61, 6.8, 71.6, -21.4, -24.9, 148.7), mn = c(71.1,
-4.6, 49.7, 66.2, 27, -4.1, 36.4, -46.3, -47.4, 96), avg = c(112.173148148148,
1.14814814814815, 77.7388888888889, 84.5111111111111, 43.262037037037,
1.05092592592593, 48.0694444444444, -34.4194444444444, -36.4416666666667,
120.952777777778), sd = c(14.5968093202928, 2.39877232936504,
9.87368667081958, 8.7204382695887, 7.29159953981859, 2.24405738054356,
7.05196278547511, 6.04899711056417, 5.77265751334298, 13.0003483658092
), md = c(114.15, 1.4, 77.35, 82.65, 41.45, 1.25, 46.35, -34.1,
-35.55, 119.75), firstldiff = c(82.9795295075625, -3.64939651058193,
57.9915155472497, 67.0702345719337, 28.6788379573998, -3.4371888351612,
33.9655188734942, -46.5174386655728, -47.9869816933526, 94.9520810461593
), firstlsum = c(141.366766788734, 5.94569280687823, 97.4862622305281,
101.951987650289, 57.8452361166742, 5.53904068701305, 62.1733700153947,
-22.3214502233161, -24.8963516399807, 146.953474509396), secldiff = c(68.3827201872697,
-6.04816883994697, 48.1178288764302, 58.349796302345, 21.3872384175813,
-5.68124621570476, 26.9135560880191, -52.566435776137, -53.7596392066956,
81.9517326803501), seclsum = c(155.963576109027, 8.34446513624327,
107.359948901348, 110.672425919877, 65.1368356564928, 7.78309806755661,
69.2253328008698, -16.2724531127519, -19.1236941266377, 159.953822875205
), value = c(116.1, -1.2, 88, 92.3, 58.8, -1.2, 63, -28.1, -29.3,
121.6), Criteria = c(NA, NA, "", "", "orange", "", "orange",
"orange", "", "orange")), .Names = c("variable", "mx", "mn",
"avg", "sd", "md", "firstldiff", "firstlsum", "secldiff", "seclsum",
"value", "Criteria"), row.names = c(NA, -10L), class = "data.frame")
My Code
I am trying to show Mean, Median and Current Value in the form of bars on geom_crossbar.But finding it hard to align it.
ggplot(df3,aes(variable,mn))+
geom_crossbar(aes(ymin = mn, ymax = mx,fill = Criteria),
width = 0.5,alpha = 0.50,position =position_dodge())+
geom_point(data=df3, aes(x=variable,y=md,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3, aes(x=variable, y=md, label = paste("Median",md)),
size = 3, vjust = 2,hjust = -1.0,color = "brown1",
position = position_dodge(width=0.9))+
geom_point(data=df3, aes(x=variable,y=avg,group=1),
shape = "|", size = 10,color = "coral4")+
geom_text(data=df3, aes(x=variable, y=avg, label = paste("Mean",mn)),
size = 3, vjust = 2.5, hjust = -1.0,color ="coral4")+
geom_point(data=df3, aes(x=variable,y=value,group=1),
shape = "|", size = 10,color ="brown1")+
geom_text(data=df3,aes(x=variable, y=value,label = paste("Current Value",value)),
size = 2, vjust = 3, hjust = -1.0,color = "brown1")+
coord_flip()
If you wish to align your geom_text layers, you can assign them the same y value. I've included an example below. I also removed some repetitive parts from your code, where the different layers can inherit the data / aesthetic mappings from the top ggplot() level.
ggplot(df3, aes(variable, mx))+
geom_crossbar(aes(ymin = mn, ymax = mx, fill = Criteria),
width = 0.5, alpha = 0.50, position = position_dodge()) +
# vertical bars
geom_point(aes(y = md), shape = "|", size = 10, color ="brown1") +
geom_point(aes(y = avg), shape = "|", size = 10, color = "coral4") +
geom_point(aes(y = value), shape = "|", size = 10, color ="brown1") +
# labels (vjust used to move the three layers vertically away from one another;
# nudge_y used to shift them uniformly rightwards)
# note that the original label for "Mean" used paste("Mean", mn), but that didn't
# look right to me, since the vertical bar above used avg instead of mn, & mn appears
# to correspond to "min", not "mean".
geom_text(aes(label = paste("Median", md)),
size = 3, vjust = -1, nudge_y = 5, hjust = 0, color = "brown1") +
geom_text(aes(label = paste("Mean", avg)),
size = 3, vjust = 0, nudge_y = 5, hjust = 0, color ="coral4") +
geom_text(aes(label = paste("Current Value", value)),
size = 2, vjust = 1, nudge_y = 5, hjust = 0, color = "brown1") +
coord_flip() +
expand_limits(y = 200) # expand rightwards to give more space for labels
Note: The above follows the approach in your code, which repeats the same geom layers for different columns in the wide format data. In general, ggplot prefers to deal with data in long format. It looks cleaner, and would be easier to maintain as you only need to make changes (e.g. increase font size, change number of decimal places in the label) once, rather than repeat the change for every affected layer. A long format approach to this problem could look like this:
# create long format data frame for labels
df3.labels <- df3 %>%
select(variable, mx, md, avg, value) %>%
tidyr::gather(type, value, -variable, -mx) %>%
mutate(label = paste0(case_when(type == "md" ~ "Median",
type == "avg" ~ "Mean",
TRUE ~ "Current Value"),
": ",
round(value, 2)),
vjust = case_when(type == "md" ~ -1,
type == "avg" ~ 0,
TRUE ~ 1))
# place df3.labels in the top level call, since there are two geom layers that
# use it as the data source, & only one that uses df3.
ggplot(df3.labels,
aes(x = variable, y = value, color = type, label = label)) +
geom_crossbar(data = df3,
aes(x = variable, y = mn, ymin = mn, ymax = mx, fill = Criteria),
inherit.aes = FALSE,
width = 0.5, alpha = 0.50) +
geom_point(shape = "|", size = 10) +
geom_text(aes(y = mx, vjust = vjust), size = 3, nudge_y = 5, hjust = 0) +
# change colour mappings here
scale_color_manual(values = c("md" = "brown1", "avg" = "coral4", "value" = "brown1"),
guide = FALSE) +
coord_flip() +
expand_limits(y = 200)