Related
I would like to create a plot using R, preferably by using ggplot. I have the following variables to visualize, most of them binary:
Trial: cong/incong
Sentence: him/himself
Condition: normal/slow
Accuracy: number
SE: number
structure(list(TrialType = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L), .Label = c("congruent", "incongruent"), class = "factor"),
SentenceType = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L
), .Label = c("him", "himself"), class = "factor"), Condition = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("normal_speech",
"slow_speech"), class = "factor"), MeanAccuracy = c(0.794871794871795,
0.762820512820513, 0.967948717948718, 0.967948717948718,
0.237179487179487, 0.342105263157895, 0.942307692307692,
0.83974358974359), SE = c(0.0342056016493384, 0.0430264468743046,
0.0389087806837746, 0.0496183045476835, 0.0135583881898854,
0.0163760608630386, 0.0170869868584354, 0.0311270245470197
)), class = "data.frame", row.names = c(NA, -8L))
The SE stands for the standard error, meaning that I would like to present the error bars around the accuracy score.
I figured that my best option is to make two bar plots, One separately for each condition with accuracy on the x-axis. Then, four bars representing both possible combinations of sentence and trial, showing the accuracy in height and error bars presented around this to reflect the uncertainty.
How could I make such a graph? Or, does anyone think that this is not the right type of graph and then what would be (and how to plot it...)?
Thanks in advance!
Are you perhaps looking for something like this?
library(ggplot2)
ggplot(df, aes(TrialType, MeanAccuracy, fill = SentenceType)) +
geom_col(position = position_dodge(width = 1), color = "gray50") +
geom_errorbar(aes(ymin = MeanAccuracy - SE,
ymax = MeanAccuracy + SE), width = 0.25,
position = position_dodge(width = 1)) +
scale_fill_manual(values = c("gold", "deepskyblue4")) +
facet_grid(.~Condition, switch = "x") +
theme_bw() +
theme(strip.placement = "outside",
strip.background = element_blank(),
panel.border = element_blank(),
panel.spacing = unit(0, "points"),
axis.line = element_line())
Using some simulated data based on the description you shared, you can try:
library(ggplot2)
library(dplyr)
library(tidyr)
#Data
df <- data.frame(Trial=rep(c('cong','incong'),4),
Sentence= rep(c('him','himself'),4),
Condition=rep(c('normal','slow'),4),
Accuracy=runif(8,0,1),
SE=runif(8,0,10),stringsAsFactors = F)
#Plot 1
df %>% pivot_longer(-c(Trial,Sentence,Condition)) %>%
ggplot(aes(x=name,y=value,fill=Condition))+
geom_bar(stat = 'identity')+
facet_wrap(.~Trial+Sentence,scales = 'free')
Output:
Or this:
#Plot 2
df %>% pivot_longer(-c(Trial,Sentence,Condition)) %>%
ggplot(aes(x=name,y=value,fill=Condition))+
geom_bar(stat = 'identity')+
facet_grid(Trial~Sentence,scales = 'free')
Output:
Further details and data are necessary to understand your issue.
I am trying to replicate this barplot in R with the following table as the input data.
There are many other sites, other than CAV1, this is just a small example of the data.
Site Time Abundance Group STDEV
1 CAV1 DAY0 7.15e-06 X 1.968384e-06
2 CAV1 Day1 3.39e-06 X 4.934761e-07
5 CAV2 DAY0 7.15e-07 Y 8.636959e-07
6 CAV2 Day1 3.39e-07 Y 3.511951e-07
Unfortunately, I have not found any codes to establish multiple values for the x-axis yet.
ggplot(data=df, aes(x=Time, y=Abundance, fill=Site)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal()
I think some modifications to this simple plot call could help get me started in the right direction.
Any help would be very appreciated.
EDIT: This question now answers the original question data and the edited question data.
You almost have it. You just need to change the fill aesthetic to Group and change your position argument:
library(ggplot2)
# for the original question data
df <- data.frame("Site" = rep("CAV1", 8),
"Time" = rep(c("DAY0", "DAY1", "DAY3", "DAY7"), 2),
"Abundance" = c(7.15e-06, 3.39e-06, 6.04e-07, 6.39e-07,
6.64e-06, 5.59e-06, 2.55e-06, 1.01e-06),
"Group" = c(rep("X", 4), rep("Y", 4)),
"STDEV" = c(1.968384e-06, 4.934761e-07, 2.004625e-07, 2.020505e-07,
8.636959e-07, 3.511951e-07, 3.008267e-07, 3.01e-07))
# you can use this plot to have side-by-side bars for each group for each day
ggplot(data=df, aes(x=Time, y=Abundance, fill=Group)) +
geom_bar(stat="identity", color="black", position="dodge") +
theme_minimal()
This data only had one Site category, when we add more sites, we can facet by them:
# for the edited question data
df2 <- data.frame("Site" = c(rep("CAV1", 2), rep("CAV2", 2)),
"Time" = rep(c("DAY0", "DAY1"), 2),
"Abundance" = rep(c(7.15e-06, 3.39e-06), 2),
"Group" = c(rep("X", 2), rep("Y", 2)),
"STDEV" = c(1.968384e-06, 4.934761e-07, 8.636959e-07, 3.511951e-07))
# you can use this plot to additionally facet by the site
ggplot(data=df2, aes(x=Time, y=Abundance, fill=Group)) +
facet_wrap(Site ~ .) +
geom_bar(stat="identity", color="black", position="dodge") +
theme_minimal()
However, in the edited data the Site and Group variables are perfectly correlated. The below example data and code more closely reflects the desired output plot from your edited question:
# for this data
df3 <- data.frame("Site" = c(rep("CAV1", 4), rep("CAV2", 4)),
"Time" = rep(c("DAY0", "DAY1"), 4),
"Abundance" = c(7.15e-06, 3.39e-06, 6.04e-07, 6.39e-07,
6.64e-06, 5.59e-06, 2.55e-06, 1.01e-06),
"Group" = rep(c(rep("X", 2), rep("Y", 2)), 2),
"STDEV" = c(1.968384e-06, 4.934761e-07, 2.004625e-07, 2.020505e-07,
8.636959e-07, 3.511951e-07, 3.008267e-07, 3.01e-07))
# you can use the same code
ggplot(data=df3, aes(x=Time, y=Abundance, fill=Group)) +
facet_wrap(Site ~ .) +
geom_bar(stat="identity", color="black", position="dodge") +
theme_minimal()
I think you could use this:
library(ggplot2)
DF <- structure(list(Site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "CAV1", class = "factor"), Time = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("DAY0", "Day1", "Day3",
"Day7"), class = "factor"), Abundance = c(7.15e-06, 3.39e-06,
6.04e-07, 6.39e-07, 6.64e-06, 5.59e-06, 2.55e-06, 1.01e-06),
Group = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("X",
"Y"), class = "factor"), STDEV = c(1.968384e-06, 4.934761e-07,
2.004625e-07, 2.020505e-07, 8.636959e-07, 3.511951e-07, 3.008267e-07,
3.01e-07)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6", "7", "8"))
#Plot
ggplot(data=DF, aes(x=Time, y=Abundance, fill=Group)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal()+ labs(x = "CAV1")
I have the following data which I am trying to plot as combined bar and line plot (with CI)
A data frame of Feature, Count, Odds Ratio and Confidence Interval values for OR
I am trying to get a plot as
A bar plot for count over lapped with a line plot for Odds Ratio with CI bars
I tried to plot in ggplot2 using following code:
ggplot(feat)+
geom_bar(aes(x=Feat, y=Count),stat="identity", fill = "steelblue") +
geom_line(aes(x=Feat, y=OR*max(feat$Count)),stat="identity", group = 1) +
geom_point(aes(x=Feat, y=OR*max(feat$Count))) +
geom_errorbar(aes(x=Feat, ymin=CI1, ymax=CI2), width=.1, colour="orange",
position = position_dodge(0.05))
However, I am not getting the CI bars for the line graph, as can be seen in pic: Rather, I am getting them for barplot
Can someone can please help me out to sort this issue.
Thanks
Edit - Dput:
df <- structure(list(Feat = structure(1:8, .Label = c("A", "B", "C",
"D", "E", "F", "G", "H"), class = "factor"), Count = structure(c(2L,
8L, 7L, 5L, 4L, 1L, 6L, 3L), .Label = c("13", "145", "2", "25",
"26", "3", "37", "43"), class = "factor"), OR = structure(c(4L,
2L, 1L, 5L, 3L, 7L, 6L, 8L), .Label = c("0.38", "1.24", "1.33",
"1.51", "1.91", "2.08", "2.27", "3.58"), class = "factor"), CI1 = structure(c(7L,
4L, 1L, 6L, 3L, 5L, 2L, 2L), .Label = c("0.26", "0.43", "0.85",
"0.89", "1.2", "1.24", "1.25"), class = "factor"), CI2 = structure(c(3L,
2L, 1L, 6L, 4L, 7L, 8L, 5L), .Label = c("0.53", "1.7", "1.82",
"1.98", "13.07", "2.83", "3.92", "6.13"), class = "factor")), class = "data.frame", row.names = c(NA,
-8L))
Is this what you had in mind?
ratio <- max(feat$Count)/max(feat$CI2)
ggplot(feat) +
geom_bar(aes(x=Feat, y=Count),stat="identity", fill = "steelblue") +
geom_line(aes(x=Feat, y=OR*ratio),stat="identity", group = 1) +
geom_point(aes(x=Feat, y=OR*ratio)) +
geom_errorbar(aes(x=Feat, ymin=CI1*ratio, ymax=CI2*ratio), width=.1, colour="orange",
position = position_dodge(0.05)) +
scale_y_continuous("Count", sec.axis = sec_axis(~ . / ratio, name = "Odds Ratio"))
Edit: Just for fun with the legend too.
ggplot(feat) +
geom_bar(aes(x=Feat, y=Count, fill = "Count"),stat="identity") + scale_fill_manual(values="steelblue") +
geom_line(aes(x=Feat, y=OR*ratio, color = "Odds Ratio"),stat="identity", group = 1) + scale_color_manual(values="orange") +
geom_point(aes(x=Feat, y=OR*ratio)) +
geom_errorbar(aes(x=Feat, ymin=CI1*ratio, ymax=CI2*ratio), width=.1, colour="orange",
position = position_dodge(0.05)) +
scale_y_continuous("Count", sec.axis = sec_axis(~ . / ratio, name = "Odds Ratio")) +
theme(legend.key=element_blank(), legend.title=element_blank(), legend.box="horizontal",legend.position = "bottom")
Since you asked about adding p values for comparisons in the comments, here is a way you can do that. Unfortunately, because you don't really want to add **all* the comparisons, there's a little bit of hard coding to do.
library(ggplot2)
library(ggsignif)
ggplot(feat,aes(x=Feat, y=Count)) +
geom_bar(aes(fill = "Count"),stat="identity") + scale_fill_manual(values="steelblue") +
geom_line(aes(x=Feat, y=OR*ratio, color = "Odds Ratio"),stat="identity", group = 1) + scale_color_manual(values="orange") +
geom_point(aes(x=Feat, y=OR*ratio)) +
geom_errorbar(aes(x=Feat, ymin=CI1*ratio, ymax=CI2*ratio), width=.1, colour="orange",
position = position_dodge(0.05)) +
scale_y_continuous("Count", sec.axis = sec_axis(~ . / ratio, name = "Odds Ratio")) +
theme(legend.key=element_blank(), legend.title=element_blank(), legend.box="horizontal",legend.position = "bottom") +
geom_signif(comparisons = list(c("A","H"),c("B","F"),c("D","E")),
y_position = c(150,60,40),
annotation = c("***","***","n.s."))
Using the data.frame below, I want to have a bar plot with y axis log transformed.
I got this plot
using this code
ggplot(df, aes(x=id, y=ymean , fill=var, group=var)) +
geom_bar(position="dodge", stat="identity",
width = 0.7,
size=.9)+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw()
to log transform y axis to show the "low" level in B and D which is close to zero, I used
+scale_y_log10()
which resulted in
Any suggestions how to transform y axis of the first plot?
By the way, some values in my data is close to zero but none of it is zero.
UPDATE
Trying this suggested answer by #computermacgyver
ggplot(df, aes(x=id, y=ymean , fill=var, group=var)) +
geom_bar(position="dodge", stat="identity",
width = 0.7,
size=.9)+
scale_y_log10("y",
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw()
I got
DATA
dput(df)
structure(list(id = structure(c(7L, 7L, 7L, 1L, 1L, 1L, 2L, 2L,
2L, 6L, 6L, 6L, 5L, 5L, 5L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("A",
"B", "C", "D", "E", "F", "G"), class = "factor"), var = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L), .Label = c("high", "medium", "low"), class = "factor"),
ymin = c(0.189863418, 0.19131948, 0.117720496, 0.255852069,
0.139624146, 0.048182771, 0.056593774, 0.037262727, 0.001156667,
0.024461299, 0.026203592, 0.031913077, 0.040168571, 0.035235902,
0.019156667, 0.04172913, 0.03591233, 0.026405094, 0.019256055,
0.011310755, 0.000412414), ymax = c(0.268973856, 0.219709677,
0.158936508, 0.343307692, 0.205225352, 0.068857143, 0.06059596,
0.047296296, 0.002559633, 0.032446541, 0.029476821, 0.0394,
0.048959184, 0.046833333, 0.047666667, 0.044269231, 0.051,
0.029181818, 0.03052381, 0.026892857, 0.001511628), ymean = c(0.231733739333333,
0.204891473333333, 0.140787890333333, 0.295301559666667,
0.173604191666667, 0.057967681, 0.058076578, 0.043017856,
0.00141152033333333, 0.0274970166666667, 0.0273799226666667,
0.0357511486666667, 0.0442377366666667, 0.0409452846666667,
0.0298284603333333, 0.042549019, 0.0407020586666667, 0.0272998796666667,
0.023900407, 0.016336106, 0.000488014)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -21L), .Names = c("id",
"var", "ymin", "ymax", "ymean"))
As #Miff has written bars are generally not useful on a log scale. With barplots, we compare the height of the bars to one another. To do this, we need a fixed point from which to compare, usually 0, but log(0) is negative infinity.
So, I would strongly suggest that you consider using geom_point() instead of geom_bar(). I.e.,
ggplot(df, aes(x=id, y=ymean , color=var)) +
geom_point(position=position_dodge(.7))+
scale_y_log10("y",
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw()
If you really, really want bars, then you should use geom_rect instead of geom_bar and set your own baseline. That is, the baseline for geom_bar is zero but you will have to invent a new baseline in a log scale. Your Plot 1 seems to use 10^-7.
This can be accomplished with the following, but again, I consider this a really bad idea.
ggplot(df, aes(xmin=as.numeric(id)-.4,xmax=as.numeric(id)+.4, x=id, ymin=10E-7, ymax=ymean, fill=var)) +
geom_rect(position=position_dodge(.8))+
scale_y_log10("y",
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)))+
geom_errorbar(aes(ymin=ymin,ymax=ymax),
size=.25,
width=.07,
position=position_dodge(.8))+
theme_bw()
If you need bars flipped, maybe calculate your own log10(y), see example:
library(ggplot2)
library(dplyr)
# make your own log10
dfPlot <- df %>%
mutate(ymin = -log10(ymin),
ymax = -log10(ymax),
ymean = -log10(ymean))
# then plot
ggplot(dfPlot, aes(x = id, y = ymean, fill = var, group = var)) +
geom_bar(position = "dodge", stat = "identity",
width = 0.7,
size = 0.9)+
geom_errorbar(aes(ymin = ymin, ymax = ymax),
size = 0.25,
width = 0.07,
position = position_dodge(0.7)) +
scale_y_continuous(name = expression(-log[10](italic(ymean)))) +
theme_bw()
Firstly, don't do it! The help file from ?geom_bar says:
A bar chart uses height to represent a value, and so the base of the
bar must always be shown to produce a valid visual comparison. Naomi
Robbins has a nice article on this topic. This is why it doesn't make
sense to use a log-scaled y axis with a bar chart.
To give a concrete example, the following is a way of producing the graph you want, but a larger k will also be correct but produce a different plot visually.
k<- 10000
ggplot(df, aes(x=id, y=ymean*k , fill=var, group=var)) +
geom_bar(position="dodge", stat="identity",
width = 0.7,
size=.9)+
geom_errorbar(aes(ymin=ymin*k,ymax=ymax*k),
size=.25,
width=.07,
position=position_dodge(.7))+
theme_bw() + scale_y_log10(labels=function(x)x/k)
k=1e4
k=1e6
I have the need to place labels above bars on ggplot. I used to use the method found (HERE) but this does not appear to work anymore since my ggplot2 update as I now get the error message:
Error in continuous_scale(c("y", "ymin", "ymax", "yend", "yintercept", :
unused argument(s) (formatter = "percent")
How can I again plot numeric values above the bars when using the example:
df <- structure(list(A = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L,
3L), .Label = c("0-50,000", "50,001-250,000", "250,001-Over"), class = "factor"),
B = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("0-50,000",
"50,001-250,000", "250,001-Over"), class = "factor"), Freq = c(0.507713884992987,
0.258064516129032, 0.23422159887798, 0.168539325842697, 0.525280898876405,
0.306179775280899, 0.160958904109589, 0.243150684931507,
0.595890410958904)), .Names = c("A", "B", "Freq"), class = "data.frame", row.names = c(NA,
-9L))
library(ggplot2)
ggplot(data=df, aes(x=A, y=Freq))+
geom_bar(aes(fill=B), position = position_dodge()) +
geom_text(aes(label = paste(sprintf("%.1f", Freq*100), "%", sep=""),
y = Freq+0.015, x=A),
size = 3, position = position_dodge(width=0.9)) +
scale_y_continuous(formatter = "percent") +
theme_bw()
Running R 2.15 ggplot2 0.9 on a win 7 machine
The error is from the scale_y_continuous call. Formatting of labels is now handled by the labels argument. See the ggplot2 0.9.0 transition guide for more details.
There was another problem with the labels not lining up correctly; I fixed that by adding a group=B to the aesthetics for the geom_text; I'm not quite sure why this is necessary, though. I also took out x=A from the geom_text aesthetics because it was not needed (it would be inherited from the ggplot call.
library("ggplot2")
library("scales")
ggplot(data=df, aes(x=A, y=Freq))+
geom_bar(aes(fill=B), position = position_dodge()) +
geom_text(aes(label = paste(sprintf("%.1f", Freq*100), "%", sep=""),
y = Freq+0.015, group=B),
size = 3, position = position_dodge(width=0.9)) +
scale_y_continuous(labels = percent) +
theme_bw()