Related
I would need some help with a Likert-scala bar chart that I created using ggplot2. Here is the data frame:
structure(list(Q4_ROLE = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L), levels = c("Civilian Analyst", "Military Analyst", "Operations/Admin Specialist"
), class = "factor"), Year = structure(c(1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 2L), levels = c("2021", "2022"), class = "factor"), Q20_A8 = structure(c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 3L, 4L, 5L, 3L), levels = c("1", "2", "3", "4", "5"), class = "factor"),
n = c(1L, 4L, 12L, 25L, 17L, 7L, 16L, 16L, 16L, 7L, 1L, 2L,
4L, 8L, 5L, 8L, 1L, 2L, 1L, 3L, 2L, 1L, 3L), perc = c(1.69491525423729,
6.77966101694915, 20.3389830508475, 42.3728813559322, 28.8135593220339,
11.2903225806452, 25.8064516129032, 25.8064516129032, 25.8064516129032,
11.2903225806452, 6.66666666666667, 13.3333333333333, 26.6666666666667,
53.3333333333333, 29.4117647058824, 47.0588235294118, 5.88235294117647,
11.7647058823529, 5.88235294117647, 50, 33.3333333333333,
16.6666666666667, 100), percent_answers = c(-0.0169491525423729,
-0.0677966101694915, 0.203389830508475, 0.423728813559322,
0.288135593220339, -0.112903225806452, -0.258064516129032,
0.258064516129032, 0.258064516129032, 0.112903225806452,
-0.0666666666666667, 0.133333333333333, 0.266666666666667,
0.533333333333333, -0.294117647058824, -0.470588235294118,
0.0588235294117647, 0.117647058823529, 0.0588235294117647,
0.5, 0.333333333333333, 0.166666666666667, 1), percent_answers_label = c("-2%",
"-7%", "20%", "42%", "29%", "-11%", "-26%", "26%", "26%",
"11%", "-7%", "13%", "27%", "53%", "-29%", "-47%", "6%",
"12%", "6%", "50%", "33%", "17%", "100%")), row.names = c(NA,
-23L), class = c("tbl_df", "tbl", "data.frame"))
Created on 2022-08-28 by the reprex package (v2.0.1)
I have five levels and I want them to be ordered correctly, but since I have it divergent I would need two different orderings. Using:
position_stack(reverse = TRUE)
works just fine when the plot was not divergent. I basically need the Neutral-Agree-Strong Agree to be reverse = TRUE and Strong disagree-Disagree to be reverse = FALSE so everything is in the right order on the divergent scale.
I have tried to filter with geom_col() to make 3-5 in a different direction than 1-2 but the second command overwrites my first one, making the filtering useless.
Q20_A8 is the Answer variable:
Factor w/ 5 levels "1","2","3","4","5"
count_8 %>%
ggplot(aes(x = Year, y = percent_answers, fill = Q20_A8)) +
geom_col(count_8 = filter(count_8, Q20_A8 %in% c("3","4","5")), position = position_stack(reverse = TRUE )) +
geom_col(count_8 = filter(count_8, Q20_A8 %in% c("1","2")), aes( y = percent_answers), position = position_stack(reverse = FALSE )) +
geom_text(aes(label = percent_answers_label), size = 2.4,
position = position_stack(reverse = FALSE, vjust = 0.5),
color = "black",
fontface = "bold") +
facet_wrap(~ Q4_ROLE, nrow=3) +
coord_flip() +
theme_minimal() +
theme(legend.title = element_text(size=8),
legend.key.size = unit(0.3, 'cm'),
legend.text = element_text(size = 6),
axis.title.y = element_text(vjust = +3),
legend.position="bottom") +
scale_fill_manual(name="Response:",
values=c("#C0392B","#F5B7B1","#E5E7E9", "#85C1E9", "#2874A6"),
labels=c("Strongly Disagree", "Disagree", "Neither Agree/Disagree", "Agree", "Strongly Agree")) +
xlab("") +
ylab("") +
ggtitle("Test") +
scale_y_continuous(limits = c(-0.5,1), labels = ylabs)
Any help is appreciated! Thank you.
You should define breaks in your scale_fill_manual according to the specific order and define the order of your data frame in specific column using for example fct_relevel from scales package. Also you can use only geom_bar(position="stack", stat = 'identity") instead of two calls of bars. Here is a reproducible example:
library(tidyverse)
library(scales)
count_8 %>%
group_by(Q4_ROLE, Year) %>%
mutate(Q20_A8 = fct_relevel(Q20_A8,"1","2","3","4","5")) %>%
ggplot(aes(x = Year, y = percent_answers, fill = Q20_A8)) +
geom_bar(position="stack", stat="identity") +
geom_text(aes(label = percent_answers_label), size = 2.4,
position = position_stack(reverse = FALSE, vjust = 0.5),
color = "black",
fontface = "bold") +
facet_wrap(~ Q4_ROLE, nrow=3) +
coord_flip() +
theme_minimal() +
theme(legend.title = element_text(size=8),
legend.key.size = unit(0.3, 'cm'),
legend.text = element_text(size = 6),
axis.title.y = element_text(vjust = +3),
legend.position="bottom") +
scale_fill_manual(name="Response:",
values=c("#C0392B","#F5B7B1","#E5E7E9", "#85C1E9", "#2874A6"),
breaks = c("1", "2", "5", "4", "3"),
labels=c("Strongly Disagree", "Disagree", "Neither Agree/Disagree", "Agree", "Strongly Agree")) +
xlab("") +
ylab("") +
ggtitle("Test")
Created on 2022-08-28 with reprex v2.0.2
Getting an error message that reads ' Error: All unnamed arguments must be length 1'. I am trying to differentiate my line by the shape of the point and I keep getting the errors above. I want to use all solid lines but differentiated by shape. The journal I want to public my work requires black and white instead of color
The plot I want to modify:
tgc <- structure(
list(
GROUP = structure(
c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L),
.Label = c("LLL", "LRL", "RLR", "RRR"),
class = "factor"
),
condition = structure(
c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L),
.Label = c("Midline", "No Midline crossing", "Midline crossing"),
class = "factor"
),
names = structure(
c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L),
.Label = c("102", "104"),
class = "factor"
),
Trial_type = structure(
c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("retention", "transfer"),
class = "factor"
),
Training = structure(
c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("right", "left"),
class = "factor"
),
N = c(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8),
NormalizedJerk = c(2060.1571177375, 1092.701687475,
2981.812368875, 1508.28547575, 2089.925186675, 1269.6704558875,
1391.97364445, 914.38364425, 3900.4119165625, 2335.7186722875,
4015.516784, 2563.16723475, 1411.2016892375, 847.952527975,
1853.413925525, 1394.847246875, 6830.81906275, 3915.019566375,
2310.5893110125, 1023.1702538875, 1340.4653657625, 765.2752940875,
5617.967587, 1705.629421875
),
sd = c(1507.9737264907, 946.522319997832,
1403.37144167604, 813.034284948276, 1383.95826055979, 915.414811222361,
580.358119711544, 361.141583497209, 3283.59638643298, 1471.60790379469,
1178.5979495842, 806.56121914288, 701.001824354642, 324.415375522303,
2103.42765701483, 1208.14874080532, 4671.80701687463, 2861.85154237875,
2130.92970547315, 813.649686635084, 489.544827982279, 256.250905151245,
5305.91963495324, 878.475841087918
),
se = c(533.14922392636,
334.646175507445, 496.166731466336, 287.451028112041, 489.303135460484,
323.648010306967, 205.187580982353, 127.682831329662, 1160.9266357632,
520.291964010475, 416.697301221774, 285.16245374901, 247.841571812654,
114.6981559765, 743.673980005258, 427.145083652716, 1651.73321101347,
1011.8173161826, 753.397422485958, 287.668605464989, 173.080233780536,
90.5983763588182, 1875.92587715314, 310.588112170911
),
ci = c(1260.69758461414,
791.31246230768, 1173.24788605882, 679.713672219011, 1157.01806049025,
765.305934151606, 485.191530116531, 301.92191947415, 2745.15527724569,
1230.29499600801, 985.332544042271, 674.302053778614, 586.052191261077,
271.218041235005, 1758.50952839896, 1010.03762375384, 3905.72840792368,
2392.56776402689, 1781.50181629779, 680.228160904389, 409.269718268804,
214.231117892992, 4435.85982330679, 734.424182295757
)
),
row.names = c(NA, -24L),
class = "data.frame"
)
tgc <- summarySE(
data10,
measurevar = "NormalizedJerk",
groupvars = c("GROUP", "condition", "names","Trial_type", "Training")
)
pd <- position_dodge2(0.2)
p <-ggplot(
gc,
aes(
names,
NormalizedJerk,
group = interaction(Training, Trial_type),
color = interaction(Training, Trial_type),
linetype = interaction(Training, Trial_type),
shape = Training
)
) +
geom_errorbar(
aes(ymin = NormalizedJerk - se, ymax = NormalizedJerk + se),
width = .3,
position = pd
) +
geom_line(
position = pd,
size = 1
) +
geom_point(
aes(shape = Training),
position = pd,
size = 1
)+
scale_colour_manual(
name = "Experimental group",
values = c("#999999","#999999","#000000","#000000")
labels = c("RRR","LLL","LRL","RLR")
) +
scale_shape_manual(
name = "Experimental group",
values = c("19","18","19","18")
) +
scale_linetype_manual(
name = "Experimental group",
values = c("solid","solid","solid","solid"),
labels = c("RRR","LLL","LRL","RLR")
) +
theme_bw() +
facet_wrap(.~condition) +
theme(
axis.title.y = element_text(vjust= 1.8, size = 14),
axis.title.x = element_text(vjust= -0.5, size = 14),
axis.title = element_text(face = "bold")
) +
xlab("Block of trials") +
ylab("Normalized Jerk")
p +
scale_x_discrete(
breaks=c("102","104"),
labels=c("Pretest","Posttest")
)
I modified your code a little. Thanks to Nic3500 for formatting the code.
You needed to add the same labels to the shape manual override as the others and turn the error bar legend off. I also increased the size of the legend because the default seems too small when showing different line types.
ggplot(tgc,
aes(
names,
NormalizedJerk,
group = interaction(Training, Trial_type),
color = interaction(Training, Trial_type),
linetype = interaction(Training, Trial_type),
shape = Training
)
) +
geom_errorbar(aes(ymin=NormalizedJerk-se, ymax=NormalizedJerk+se),
show.legend=FALSE, # <- here
width=.3, position=pd) +
geom_line(position=pd, size = 1) +
geom_point(position=pd, size= 2) +
scale_colour_manual(name = "Experimental group",
values=c("#999999","#999999","#000000","#000000"),
labels=c("RRR","LLL","LRL","RLR")) +
scale_shape_manual(name = "Experimental group", values=c(19,18,19,18),
labels=c("RRR","LLL","LRL","RLR")) + # <- here
scale_linetype_manual(name = "Experimental group",
values=c("solid","dashed","solid","dashed"),
labels=c("RRR","LLL","LRL","RLR")) +
theme_bw()+
facet_wrap(.~condition) +
theme(axis.title.y = element_text(vjust= 1.8, size = 14),
axis.title.x = element_text(vjust= -0.5, size = 14),
axis.title = element_text(face = "bold"),
legend.key.width = grid::unit(1.25, "cm")) + # <- here
xlab("Block of trials") + ylab("Normalized Jerk")
I need some help with stat_compare_means and multiple groups.
Here is what my data look like.
> head(df_annot)
Row.names Diversity_sh Diversity_si Evenness Chao1 Location Bean Fungi Insect
1 R-B1 1.314181 0.6040213 0.3053349 91.00000 Root Bean M- NI
2 R-B2 1.323718 0.6117602 0.3075507 77.43750 Root Bean M- NI
3 R-B3 1.249950 0.5737293 0.2877545 81.50000 Root Bean M- NI
4 R-BF-1 1.177111 0.5414276 0.2693958 92.33333 Root Bean M+ NI
5 R-BF-2 1.191254 0.5252688 0.2742420 79.54545 Root Bean M+ NI
6 R-BF-3 1.397233 0.6285945 0.3179540 85.50000 Root Bean M+ NI
Here is a graph and I would like ALL comparisons labelled.
Here is some code. I know that I don't have my_comparisons correct, but I don't know where to start for the two groups. I want to compare M+/Insect to M-/Insect and M+/Insect to M+/NI etc.., all two-way comparisons. Any suggestions would be great. thanks
my_comparisons<- list( c("M+", "M-"), c("Insect", "NI"))
ggplot(df_annot,aes_string(x="Insect",y=index,fill="Fungi"))+
geom_boxplot(alpha=0.8)+
geom_point(aes(fill=Fungi),size = 3, shape = 21,position = position_jitterdodge(jitter.width = 0.02,jitter.height = 0))+
stat_compare_means(comparison=my_comparisons,label="p.format",method="wilcox.test")+
#ggtitle(df_name)+
ylab(paste(index))+
xlab("")+
# scale_x_discrete(labels= c("M+","M-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
dput(df_annot)
structure(list(Row.names = structure(c("R-B1", "R-B2", "R-B3",
"R-BF-1", "R-BF-2", "R-BF-3", "R-BFi-1", "R-BFi-2", "R-Bi-1",
"R-Bi-2", "R-Bi-3"), class = "AsIs"), Diversity_sh = c(1.31418133185869,
1.32371839350534, 1.24994951615418, 1.17711111336449, 1.19125374868316,
1.39723272927515, 1.34145146126423, 1.21674449259962, 1.20721660188555,
1.17245529262564, 1.20912937911657), Diversity_si = c(0.604021268328531,
0.611760247980402, 0.573729285531772, 0.541427625516077, 0.525268755766239,
0.628594506768001, 0.597250229879166, 0.554646956896473, 0.548992316400345,
0.531291238688503, 0.583806537719818), Evenness = c(0.305334910927276,
0.307550737463383, 0.287754490536268, 0.269395848882803, 0.274241968272787,
0.317954009728278, 0.305260435164649, 0.276882141486585, 0.273949061455415,
0.269914321375221, 0.275929262855007), Chao1 = c(91, 77.4375,
81.5, 92.3333333333333, 79.5454545454545, 85.5, 87.5, 90.5454545454545,
89.3333333333333, 88.6666666666667, 88.0769230769231), Location = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Root", "Rhizospheric Soil"
), class = "factor"), Bean = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "Bean", class = "factor"),
Fungi = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L), .Label = c("M+", "M-"), class = "factor"), Insect = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Insect",
"NI"), class = "factor")), row.names = c(NA, -11L), class = "data.frame")
facet_wrap() might help you as discussed here
ggplot(df_annot, aes(x=df_annot$Insect, y= df_annot$Evenness)) +
facet_wrap(~df_annot$Fungi)+
geom_boxplot(alpha=0.8) +
geom_point()+
stat_compare_means(comparisons = list(c("Insect", "NI") ), label="p.format",method="wilcox.test")
EDIT
ok here is a - not too elegant - solution without faceting.
Create a new variable containing Insect info and Fungi status:
df_annot$var <- paste(df_annot$Insect,df_annot$Fungi, sep = "_" )
Then build the contrasts
my_comparisons <- rev(list(c("Insect_M-","Insect_M+"),c("NI_M-","Insect_M-"),c("NI_M+","Insect_M-"),
c("Insect_M+", "NI_M-"), c("Insect_M+", "NI_M+"), c("NI_M-","NI_M+")))
and plot your graph
ggplot(df_annot,aes_string(x="var",y="Evenness",fill="Fungi"))+
geom_boxplot(alpha=0.8)+
geom_point(aes(fill=Fungi),size = 3, shape = 21,position = position_jitterdodge(jitter.width = 0.02,jitter.height = 0))+
stat_compare_means(comparison=my_comparisons,label="p.format",method="wilcox.test")+
#ggtitle(df_name)+
ylab(paste("Evenness"))+
xlab("")+
# scale_x_discrete(labels= c("M+","M-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
you might want to make better names and such. but this could be what you are looking for.
I've created a plot which shows the means of two groups and associated 95% confidence band, as below. For the plot, I've already used different line types, fillings, colors.
The data plot_band is as follows.
dput(plot_band)
structure(list(mean = c(0.0909296772008702, 0.0949102886382386,
0.0989192140983566, 0.102428753920507, 0.106190021551613, 0.109834234007574,
0.11282406874623, 0.116443987192088, 0.119646042014149, 0.122877131667032,
0.125734341129646, 0.129194412319665, 0.131921946416482, 0.13467000293138,
0.137801823091921, 0.140320771073742, 0.143300871011905, 0.145703574224808,
0.148502607395268, 0.151216269559201, 0.153957673466713, 0.15642722394871,
0.159399752204122, 0.16158535629103, 0.163992551285173, 0.166446319141126,
0.168796463238069, 0.17130024918415, 0.17319290052143, 0.175970079857704,
0.178037138778032, 0.180359643729028, 0.182563083353043, 0.184882067722455,
0.186933337196788, 0.18928611634363, 0.19095095692481, 0.193552969255731,
0.195137836881874, 0.197581990963152, 0.199824696342001, 0.201576167030431,
0.203292777876833, 0.205785273925517, 0.207611128924057, 0.209067294675698,
0.211624327477106, 0.213018027996152, 0.215073900329166, 0.21654896049152,
0.218432328738047, 0.220299232072702, 0.221520169903876, 0.224082916931098,
0.225373663731495, 0.227623092060467, 0.228971037740905, 0.230665903341562,
0.232255049713341, 0.233816039663021, 0.236156033603955, 0.237722706454038,
0.239326639984125, 0.241061288510212, 0.323782287073584, 0.325539303794681,
0.326575563604555, 0.327932235745535, 0.329326904419804, 0.330270965006864,
0.331794972975829, 0.332736401387824, 0.333736983920265, 0.334858878358806,
0.335995344145518, 0.336884010919713, 0.337760950823761, 0.338470035342276,
0.339694375762279, 0.340590586642847, 0.340934410282471, 0.342186505998774,
0.342699699846757, 0.343822718137376, 0.344352069575663, 0.345191547743302,
0.345986783878912, 0.346908459064914, 0.347636673707646, 0.3483601957891,
0.349017016236978, 0.349393026672962, 0.350215046428817, 0.350578051082168,
0.351357872622786, 0.351833990930714, 0.352451422717008, 0.352852417773313,
0.353786047124291, 0.354360144310735, 0.354804607588953, 0.355216156665893,
0.3556114518015, 0.356570758245453, 0.357097049535425, 0.357671243406622,
0.35787930232607, 0.358500009058086, 0.359107586207553, 0.359418346394681,
0.359923090516015, 0.360327770652831, 0.360646653761867, 0.361526704703965,
0.361860340596181, 0.362284616802613, 0.362408547406209, 0.363068975461424,
0.363173638916247, 0.363746165222553, 0.364318465554143, 0.364550369183249,
0.365263491228022, 0.365588246738469, 0.366124420845147, 0.366327320718437,
0.366730809501062, 0.367298014408034), p2.5 = c(0.00920236578162877,
0.0111305911426958, 0.0131257550019632, 0.015586474005665, 0.017588259827762,
0.0195835240844649, 0.021653464115484, 0.0245221378289171, 0.0263028370478539,
0.0283125178459841, 0.030809139661692, 0.034224299031932, 0.0351514351131448,
0.0374690177003245, 0.0401208217539481, 0.0416432632702995, 0.0436268495854353,
0.0455924496480308, 0.0481710615607138, 0.0498487868097217, 0.052013860735697,
0.0541864115090449, 0.0559355297931858, 0.0582185384506931, 0.0595049507852038,
0.0617291057747846, 0.0624904066599628, 0.064090526611587, 0.0665855608482458,
0.0681610015253132, 0.0689510143842853, 0.0714235246023074, 0.0730718365551066,
0.0733828347805513, 0.0749772653575311, 0.0775677990166739, 0.0782434582066251,
0.0809696065399504, 0.0800620502625316, 0.0822097262074474, 0.0837314882447324,
0.0836800886932387, 0.0843305338836378, 0.0862036703259026, 0.0874082656018874,
0.0881312854081838, 0.0887921830279765, 0.0892805555426737, 0.0901061351380764,
0.0914750995958728, 0.0913838119125662, 0.0926827936869315, 0.0929511644196126,
0.0940218350370357, 0.0944327299872979, 0.0953545299910439, 0.0948298565703383,
0.0957001873318579, 0.0961251564147676, 0.0971098251546806, 0.0974911491380601,
0.0986598120212823, 0.0982370236835561, 0.0987719638365328, 0.114148199394403,
0.125138552629865, 0.133069438084806, 0.140931059768343, 0.147647282172844,
0.155831735418124, 0.163154010787227, 0.16809087346053, 0.173413948644787,
0.178336300631342, 0.183561163161725, 0.189552221591194, 0.192350001446747,
0.19547327255232, 0.19824967633061, 0.202611107184988, 0.205071997319457,
0.206232495037667, 0.208471493073236, 0.209717390943683, 0.211692880593303,
0.213829033311537, 0.215383413348152, 0.216370831366554, 0.216980537940184,
0.217670415960084, 0.218147500129008, 0.219104770868165, 0.220215949003459,
0.219501167154474, 0.219635297722562, 0.220565169003312, 0.218821371303922,
0.218910618214851, 0.219518190869959, 0.219204079206471, 0.219448334243776,
0.219174641398391, 0.217619259716122, 0.217993716481521, 0.218343413130982,
0.217141573568049, 0.216438618727695, 0.215672180354215, 0.214841486865522,
0.214092486614703, 0.216084004877199, 0.213891621307228, 0.213397326450924,
0.212530621813324, 0.212650230928244, 0.211323326285971, 0.211512467761759,
0.209879967307571, 0.208388878793908, 0.209257043929222, 0.207665115418059,
0.207413292377895, 0.204980142991601, 0.206053394727878, 0.205039712521127,
0.203155679138143, 0.202289445844638, 0.201779149557556), p97.5 = c(0.240681337890249,
0.239988615023241, 0.239222274397932, 0.23882694927308, 0.239567463457127,
0.240035884370459, 0.239971640602537, 0.242348644629734, 0.244241554912481,
0.246794068956881, 0.248869825514075, 0.252843804762058, 0.254595507587193,
0.257498240756364, 0.26074636531938, 0.263991307688752, 0.268222101449506,
0.270245299020079, 0.278955701793892, 0.280366963871541, 0.286253886155709,
0.290942761721134, 0.29709853936211, 0.300641051539586, 0.307350564223005,
0.314475951046524, 0.31757563389217, 0.324250050938626, 0.326645521042049,
0.334746718583917, 0.341297900171566, 0.347056902406046, 0.352412986039391,
0.356409285744598, 0.364329251893085, 0.36882469705109, 0.373595444661095,
0.379308956442793, 0.388012909521406, 0.393418480355642, 0.399407258087214,
0.403270925317011, 0.407517084163824, 0.413742327029277, 0.42089783652825,
0.422996679448412, 0.430738094720356, 0.433915405828653, 0.438263395419797,
0.442376801773873, 0.450664409546504, 0.453854917168461, 0.455755257192578,
0.463879371708031, 0.470262095557133, 0.478816677993115, 0.478998770025097,
0.485204929246363, 0.490588733478761, 0.49747652543363, 0.498792119487052,
0.508008619470507, 0.51314092048762, 0.518568532547669, 0.579810955268174,
0.563256045407579, 0.55093710586083, 0.541241619905278, 0.532667775608687,
0.523824194956849, 0.518816497858615, 0.512618467188886, 0.506452368044292,
0.501653171003674, 0.499276681561068, 0.496002704329641, 0.494256887981196,
0.49200837587611, 0.490570113245846, 0.491077058931435, 0.487352049845066,
0.487927727831147, 0.487928022062059, 0.488900063808496, 0.488866145012628,
0.489808465409391, 0.491100206396406, 0.492044173457154, 0.494346147046575,
0.494980820850837, 0.49616843086841, 0.497216550345458, 0.499201695431901,
0.501160614633382, 0.502598288902507, 0.504203085629905, 0.50530488873578,
0.508449115699177, 0.508914783054669, 0.51306711977087, 0.51479783743171,
0.51648055644086, 0.518549503653961, 0.522859455223989, 0.522598786005884,
0.52736459871623, 0.527054294078792, 0.532359397607223, 0.532643025946804,
0.533817320437782, 0.535862852499484, 0.539613602346564, 0.54138065631686,
0.544340213112881, 0.545596882887723, 0.549029532028693, 0.546769636775625,
0.551728290583129, 0.552996735997194, 0.555676593069663, 0.559580922687426,
0.561700216317917, 0.562726465369815, 0.563527127546323, 0.567715046522725,
0.568850181180136, 0.56965258128659, 0.571847219713553), outcome = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("DLT",
"CB"), class = "factor"), exp_X_post = c(721.595263503532, 794.40305777437,
865.319646465533, 933.669956578678, 999.728550839186, 1062.12810757171,
1121.92986212885, 1186.37187215809, 1246.1267376175, 1305.33376392859,
1359.36602305224, 1421.23758898206, 1472.44041133326, 1520.62395309786,
1584.09764621781, 1634.01654454251, 1685.34860459111, 1735.26374323406,
1785.87871337346, 1840.42999799797, 1888.32905203148, 1937.38674685726,
1990.74583676908, 2041.61942276328, 2083.76909363497, 2134.07414000003,
2177.97560514382, 2227.25787768033, 2269.76501622686, 2319.50659548171,
2360.78992430352, 2404.37623851091, 2449.36656617313, 2500.80748523146,
2540.71467060956, 2588.5685157055, 2630.93535458068, 2675.04099554242,
2709.53185769419, 2763.12669881888, 2807.24737149465, 2849.03542063784,
2887.16961904492, 2927.78459960731, 2973.91123171086, 3006.0197134382,
3056.06581532434, 3089.41001229951, 3132.29020081068, 3177.35838641843,
3212.66669292569, 3256.19625640177, 3284.73766167032, 3330.28770837953,
3368.28973519186, 3409.05190043795, 3449.93435443996, 3485.59367731521,
3524.70884576068, 3557.60265444533, 3615.06476720162, 3648.55074883409,
3688.13510762386, 3727.38208940522, 721.595263503532, 794.40305777437,
865.319646465533, 933.669956578678, 999.728550839186, 1062.12810757171,
1121.92986212885, 1186.37187215809, 1246.1267376175, 1305.33376392859,
1359.36602305224, 1421.23758898206, 1472.44041133326, 1520.62395309786,
1584.09764621781, 1634.01654454251, 1685.34860459111, 1735.26374323406,
1785.87871337346, 1840.42999799797, 1888.32905203148, 1937.38674685726,
1990.74583676908, 2041.61942276328, 2083.76909363497, 2134.07414000003,
2177.97560514382, 2227.25787768033, 2269.76501622686, 2319.50659548171,
2360.78992430352, 2404.37623851091, 2449.36656617313, 2500.80748523146,
2540.71467060956, 2588.5685157055, 2630.93535458068, 2675.04099554242,
2709.53185769419, 2763.12669881888, 2807.24737149465, 2849.03542063784,
2887.16961904492, 2927.78459960731, 2973.91123171086, 3006.0197134382,
3056.06581532434, 3089.41001229951, 3132.29020081068, 3177.35838641843,
3212.66669292569, 3256.19625640177, 3284.73766167032, 3330.28770837953,
3368.28973519186, 3409.05190043795, 3449.93435443996, 3485.59367731521,
3524.70884576068, 3557.60265444533, 3615.06476720162, 3648.55074883409,
3688.13510762386, 3727.38208940522)), .Names = c("mean", "p2.5",
"p97.5", "outcome", "exp_X_post"), row.names = c("pi_A[1]", "pi_A[2]",
"pi_A[3]", "pi_A[4]", "pi_A[5]", "pi_A[6]", "pi_A[7]", "pi_A[8]",
"pi_A[9]", "pi_A[10]", "pi_A[11]", "pi_A[12]", "pi_A[13]", "pi_A[14]",
"pi_A[15]", "pi_A[16]", "pi_A[17]", "pi_A[18]", "pi_A[19]", "pi_A[20]",
"pi_A[21]", "pi_A[22]", "pi_A[23]", "pi_A[24]", "pi_A[25]", "pi_A[26]",
"pi_A[27]", "pi_A[28]", "pi_A[29]", "pi_A[30]", "pi_A[31]", "pi_A[32]",
"pi_A[33]", "pi_A[34]", "pi_A[35]", "pi_A[36]", "pi_A[37]", "pi_A[38]",
"pi_A[39]", "pi_A[40]", "pi_A[41]", "pi_A[42]", "pi_A[43]", "pi_A[44]",
"pi_A[45]", "pi_A[46]", "pi_A[47]", "pi_A[48]", "pi_A[49]", "pi_A[50]",
"pi_A[51]", "pi_A[52]", "pi_A[53]", "pi_A[54]", "pi_A[55]", "pi_A[56]",
"pi_A[57]", "pi_A[58]", "pi_A[59]", "pi_A[60]", "pi_A[61]", "pi_A[62]",
"pi_A[63]", "pi_A[64]", "qi_A[1]", "qi_A[2]", "qi_A[3]", "qi_A[4]",
"qi_A[5]", "qi_A[6]", "qi_A[7]", "qi_A[8]", "qi_A[9]", "qi_A[10]",
"qi_A[11]", "qi_A[12]", "qi_A[13]", "qi_A[14]", "qi_A[15]", "qi_A[16]",
"qi_A[17]", "qi_A[18]", "qi_A[19]", "qi_A[20]", "qi_A[21]", "qi_A[22]",
"qi_A[23]", "qi_A[24]", "qi_A[25]", "qi_A[26]", "qi_A[27]", "qi_A[28]",
"qi_A[29]", "qi_A[30]", "qi_A[31]", "qi_A[32]", "qi_A[33]", "qi_A[34]",
"qi_A[35]", "qi_A[36]", "qi_A[37]", "qi_A[38]", "qi_A[39]", "qi_A[40]",
"qi_A[41]", "qi_A[42]", "qi_A[43]", "qi_A[44]", "qi_A[45]", "qi_A[46]",
"qi_A[47]", "qi_A[48]", "qi_A[49]", "qi_A[50]", "qi_A[51]", "qi_A[52]",
"qi_A[53]", "qi_A[54]", "qi_A[55]", "qi_A[56]", "qi_A[57]", "qi_A[58]",
"qi_A[59]", "qi_A[60]", "qi_A[61]", "qi_A[62]", "qi_A[63]", "qi_A[64]"
), class = "data.frame")
Now I want to add some vertical dashed lines. I wish to use different color for each vertical line and have legend for those lines as well. The information for those vertical lines are in another data frame observed_mean:
dput(observed_mean)
structure(list(TRT = structure(1:9, .Label = c("A", "B", "C",
"D", "E", "F", "G", "H", "I"), class = "factor"), gmcmin = c(967.117632548,
1306.76729845833, 2394.519441584, 2404.73065902857, 3047.48745766364,
2550.12866139, 1863.6505272925, 3569.57489109, 3660.40695204)), .Names = c("TRT",
"gmcmin"), row.names = c(NA, -9L), class = "data.frame")
Here is the code to generate the plot:
range <- range(plot_band$exp_X_post)
range <- c(floor(range[1]), ceiling(range[2]))
step <- floor((range[2] - range[1]) / 10)
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome, linetype = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5, linetype = NA,
fill = outcome),
alpha = 0.4) +
geom_line(size = 1.5) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(limits = range,
breaks = seq(range[1], range[2], by = step)
) +
geom_vline(xintercept = observed_mean$gmcmin,
linetype = 'longdash') +
theme_bw() +
theme(legend.position = 'top',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Probability (95% CI)")
Note: the last three lines are used to change the legend title from variable name outcome to "Probability (95% CI)". NOT sure whether that's the right way though.
Questions:
I wish to put the current legend to the right, then below that I'd like to put the legend for vertical lines. Could anyone give me some clues how to do that?
As shown in the plot, there are two identical (not same color though) legends on top. The one below comes out if I change the order of the factor outcome with following code. I am not sure why that happens. How could I get rid of that?
plot_band$outcome <- factor(plot_band$outcome, levels = c("DLT", "CB"))
Thanks a lot for any comments/suggestions!!
The extra legend box is showing up because of the linetype = NA in the aes() of geom_ribbon moving the linetype out of the mapping will take care of that.
For the line labeling, you can perhaps just put the labels on the plot using geom_text
Here is a full plot that does something like that (now with ggrepel to place the labels more sensibly -- can't believe I didn't start there)
# install.packages("devtools")
# devtools::install_github("slowkow/ggrepel")
library(ggrepel)
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome, linetype = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5,
fill = outcome),
alpha = 0.4
, linetype = "blank") +
geom_line(size = 1.5) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(limits = range,
breaks = seq(range[1], range[2], by = step)
) +
geom_vline(xintercept = observed_mean$gmcmin
, linetype = 'longdash') +
geom_text_repel(
mapping = aes(
x = gmcmin
, y = 0
, label = TRT
, color = NA
, linetype = NA)
, data = observed_mean
, show.legend = FALSE) +
theme_bw() +
theme(legend.position = 'top',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Probability (95% CI)")
(Note: the mean labels overlap, so you may need to more careful position those, e.g., by adding another column to observed_mean giving the position where you want them plotted).
If you need the labels to be in a legend instead, you can use this code:
ggplot(plot_band, aes(x = exp_X_post, y = mean,
color = outcome)) +
geom_ribbon(aes(ymin = p2.5, ymax = p97.5,
fill = outcome),
alpha = 0.4
, linetype = "blank") +
geom_line(#aes(linetype = outcome)
#,
size = 1.5
# , show.legend = FALSE
) +
xlab("Exposure") +
ylab("Proability of CB/DLT") +
scale_x_continuous(breaks = pretty(range)) +
geom_vline(
mapping = aes(xintercept = gmcmin
, linetype = TRT)
, data = observed_mean) +
theme_bw() +
theme(legend.position = 'right',
plot.margin = unit(c(1, 1, 3, 1), "lines"),
legend.title = element_text(size = 15),
axis.title.y = element_text(margin = margin(0, 15, 0, 0))) +
scale_color_discrete(name = "Probability (95% CI)") +
scale_fill_discrete(name = "Probability (95% CI)") +
scale_linetype_discrete(name = "Treatment")
Note, that I removed the linetype from the main lines, as it was causing some weirdness with the vertical line. You can add it back by uncommenting the parts in geom_line() but note that it then shows up in the list with the treatments. There is probably a way to fix that if you absolutely need it, but my quick tries aren't working. I will note, however, that the linetypes are a bit hard to pick out.
Example plot with both the legend and the labels
I'm working with ggplot2 for the first time, and I'm having trouble making the colors of the labels I created with ggrepel change dynamically. Currently, my code looks like this:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
In general, this works pretty well, except I strongly dislike the toothpaste green color it gives me for one of the two factors plotted. I want to dictate the specific colors of that fill = factor(Hemisphere)) line, but I don't know how.
I have already tried using the scale_colours_manual function, but when I include it within the geom_label_repel(.....) paratheses in line 3, the program complains that "ggplot2 doesn't know how to deal with data of class ScaleDiscrete/Scale/ggproto", and when I place the scale_colours_manual line outside of line 3, it has no effect at all, as in this example, which produced an identical plot to the one above:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
scale_colour_manual(values = c('blue', 'red')) +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
I know there has to be a way to do this, but I'm at a loss. Thanks for any help you've got!
EDIT: At request, I've attached a dput() of tstat. Not a big data frame.
structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
You can use scale_fill_manual:
tstat <- structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
library(ggplot2)
library(ggrepel)
library(ggthemes)
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere") +
scale_fill_manual(values = setNames(c("lightblue", "darkgreen"), levels(tstat$Hemisphere)))