Add text to plot with facetted bar chart - r

My question is related to this question. I want "2014" in the 4-year facet. I tried to repeat but my code doesn't give what I want.
Annotating text on individual facet in ggplot2
This is my data
structure(list(Rot = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2-year",
"3-year", "4-year"), class = "factor"), Rot.Herb = structure(c(3L,
3L, 4L, 4L, 13L, 13L, 14L, 14L, 5L, 5L, 6L, 6L, 9L, 9L, 10L,
10L, 15L, 15L, 16L, 16L, 1L, 1L, 2L, 2L, 7L, 7L, 8L, 8L, 11L,
11L, 12L, 12L, 17L, 17L, 18L, 18L), .Label = c("A4-conv", "A4-low",
"C2-conv", "C2-low", "C3-conv", "C3-low", "C4-conv", "C4-low",
"O3-conv", "O3-low", "O4-conv", "O4-low", "S2-conv", "S2-low",
"S3-conv", "S3-low", "S4-conv", "S4-low"), class = "factor"),
variable = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Diversity",
"Evenness"), class = "factor"), N = c(4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4), value = c(0.78537789925, 0.613408315,
1.305194686, 0.79519430975, 0.4481728555, 0.30608817425,
1.20978861475, 0.8580643725, 0.92387324875, 0.630166121,
0.945954185, 0.561172324, 1.43952456275, 0.8616864655, 1.23679146725,
0.831737624, 1.033474108, 0.80689293925, 0.9910142125, 0.79342098075,
1.175512223, 0.6293940245, 0.981614832, 0.62342189825, 1.351710013,
0.805075937, 1.6598348325, 0.7983622545, 1.01606920875, 0.5751418795,
1.0500365255, 0.56408326225, 1.07162937725, 0.6756859865,
0.45699816625, 0.44444147325), sd = c(0.354077266902404,
0.208934910331856, 0.169501822767995, 0.0774319459391732,
0.737366460962239, 0.40697977697835, 0.494107033311986, 0.11906912863268,
0.491492768082854, 0.34236657107712, 0.219739438843007, 0.205905593411204,
0.319301583035043, 0.0696484379979274, 0.0563293598951725,
0.0978700910274188, 0.446850757364563, 0.175073468716825,
0.426859848850874, 0.180469101499932, 0.526842123835502,
0.200470277385505, 0.574885944755375, 0.27189545397305, 0.39621771945215,
0.150798258847229, 0.275863362594154, 0.111178397407429,
0.254811233135664, 0.158920851982914, 0.198698241334475,
0.0730606635175717, 0.717706309307313, 0.453776579066358,
0.574276936403411, 0.513758415496589), se = c(0.177038633451202,
0.104467455165928, 0.0847509113839974, 0.0387159729695866,
0.368683230481119, 0.203489888489175, 0.247053516655993,
0.0595345643163399, 0.245746384041427, 0.17118328553856,
0.109869719421504, 0.102952796705602, 0.159650791517521,
0.0348242189989637, 0.0281646799475863, 0.0489350455137094,
0.223425378682282, 0.0875367343584126, 0.213429924425437,
0.090234550749966, 0.263421061917751, 0.100235138692753,
0.287442972377688, 0.135947726986525, 0.198108859726075,
0.0753991294236146, 0.137931681297077, 0.0555891987037145,
0.127405616567832, 0.0794604259914568, 0.0993491206672376,
0.0365303317587859, 0.358853154653656, 0.226888289533179,
0.287138468201705, 0.256879207748294), ci = c(0.563415944919255,
0.332462066715199, 0.26971522480343, 0.123211505132525, 1.1733145846647,
0.647595643784969, 0.786234551289211, 0.189465554245211,
0.782074671929471, 0.544781614588516, 0.349654482635521,
0.327641747494367, 0.508080071600555, 0.110826207087643,
0.089632581638694, 0.155733154793995, 0.71103927089404, 0.278580956835532,
0.679229274424713, 0.287166612643164, 0.838323385234058,
0.318992946792351, 0.914771825423139, 0.432646341459985,
0.630470808679215, 0.23995368085579, 0.438960169525453, 0.176909640028318,
0.40546153371869, 0.252878539112781, 0.316173242000635, 0.116255819336536,
1.14203089616693, 0.722059798737006, 0.91380275723334, 0.817504285602766
)), .Names = c("Rot", "Rot.Herb", "variable", "N", "value",
"sd", "se", "ci"), row.names = c(NA, -36L), class = "data.frame")
and the code to graph
p <- ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable)))+
geom_bar(stat="identity", position="dodge")+
scale_fill_brewer(palette = "Set1")+
theme_bw() +
theme(panel.grid.major=element_blank()) +
facet_grid(~Rot, scales = "free_x", space="free_x")+
theme(legend.title=element_blank(),legend.text=element_text(size=20),legend.position="top")+
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9))+
xlab("\nTreatment") +
theme(axis.title = element_text(size=24,face="bold", vjust=4), axis.text.x = element_text(size=20,angle = 90, hjust = 1)) +
ylab("Shannon's H' and E'") +
theme(axis.title = element_text(size=24,face="bold", vjust=2), axis.text.y = element_text(size=20, color="black"))+
theme(strip.text.x = element_text(colour = "black", size = 20), strip.background = element_rect(fill = "white"))
produced graph (please don't mind the "2014" on the y-axis).
New code to annotate 2014, with help from eipi10
ann_text <- data.frame(x = "S4-conv",y = 1.75,lab = "2014", Rot.Herb=NA,
value=NA, variable=NA,
N=NA, sd=NA, se=NA, ci=NA,
Rot = factor("4-year",levels = c("2-year","3-year","4-year")))
I got an error saying Error: Discrete value supplied to continuous scale after I run p + geom_text(data = ann_text,label = "2014"). Please see what have been wrong with my code and data format. Thanks.

It turns out the issue is that when you include value=NA in ann_text it gets interpreted as logical (rather than numeric, which is its mode in Shannon.long2), causing the error because ggplot expects a numeric variable rather than a categorical one. Set value=NA_real_ (in addition to NA, R has class-specific missing value constants; see ?NA for more info) in ann_text to ensure value is interpreted as numeric and resolve the error. Or set value to any number, e.g., value=0.
In the example below, I've removed all of the theme and lab statements to shorten the code down to the essentials:
p = ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable))) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9)) +
facet_grid(~Rot, scales = "free_x", space="free_x")
ann_text <- data.frame(x = "S4-conv", y = 1.75, lab = "2014", Rot.Herb=NA,
value=NA_real_, variable=NA)
p + geom_text(data = ann_text, aes(label=lab, x, y))
Note that you also need to feed x and y values to geom_text to provide the label location.
Another option would be to just use the same x and y variable names as in your original data frame, since ggplot already knows these names and has scaled the graph based on them. Now the only missing column we need to add is variable:
ann_text <- data.frame(Rot.Herb = "S4-conv", value = 1.75, lab = "2014", variable=NA)
p + geom_text(data = ann_text, aes(label=lab, Rot.Herb, value))

Related

How to prevent R from alphabetically ranking data in ggplot and specify the order in which data is plotted (Data + Code + Graphs provided)?

I'm trying to fix an issue with my GGBalloonPlot graph with regards to how R processes the axis labels.
By default R plots the data using the labels ranked in reverse alphabetical order but to reveal the pattern of the data, the data need to be plotted in a specific order. The only way I've been able to do trick the software is by manually adding a prefix to each label in my .csv table so that R would rank them properly in my output. This is time consuming since I need to manually order the data first before adding the prefix and then plotting.
I would like to input a character vector (or something like that) which would essentially specify the order in which I want to have the data plotted which would reveal the pattern without the need for a prefix in the label name.
I have made some attempts with "scale_y_discrete" without success. I would also like to do the same thing for the X axis since I've had to use the same "trick" to display the columns in the proper non-alphabetical order which offsets the position of the labels. Any idea on how to get GGplot to display my values as seen in the graph without having to "trick" the software since this is quite time consuming ?
Data + Code
#Assign data to "Stack_Overflow_DummyData"
Stack_Overflow_DummyData <- structure(list(Species = structure(c(8L, 3L, 1L, 5L, 6L, 2L,
7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L,
7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L), .Label = c("Ani", "Cal",
"Can", "Cau", "Fis", "Ort", "Sem", "Zan"), class = "factor"),
Species_prefix = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L,
1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L,
2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("ac.Cau",
"ad.Sem", "af.Cal", "ag.Ort", "as.Fis", "at.Ani", "be.Can",
"bf.Zan"), class = "factor"), Dist = structure(c(2L, 3L,
5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L,
3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L
), .Label = c("End", "Ind", "Pan", "Per", "Wid"), class = "factor"),
Region = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Cen", "Col",
"Far", "Nor"), class = "factor"), Region_prefix = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("a.Far", "b.Nor", "c.Cen", "d.Col"), class = "factor"),
Frequency = c(75, 50, 25, 50, 0, 0, 0, 0, 11.1, 22.2, 55.6,
55.6, 11.1, 0, 5.6, 0, 0, 2.7, 36.9, 27.9, 65.8, 54.1, 37.8,
28.8, 0, 0, 0, 3.1, 34.4, 21.9, 78.1, 81.3)), class = "data.frame", row.names = c(NA,
-32L))
# Plot Data With Prefix Trick
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)
# Add Frequency Values Next to the circles
# Plot Data Without Prefix Trick
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)
# Add Frequency Values Next to the circles
Here below are the graphs
Good Graph.
Using the label prefix trick with the visible pattern in the data:
Wrong Graph (R default).
Without the prefix trick when GGplot automatically orders the data/labels and the graph makes no sense:
To sum up, I would like the Good graph output without having to have to previously add a prefix in my labels.
Many Thanks in advance for your help.
For the axis labels I would define a previous function to override the breaks:
shlab <- function(lbl_brk){
sub("^[a-z]+\\.","",lbl_brk) # removes the starts of strings as a. or ab.
}
Then, to change the labels you just have to use scale_x,y_discrete with labels = shlab (if you look at the help of scale_x_discrete you will see that one of the options for labels is A function that takes the breaks as input and returns labels as output).
For the colours would be enough to change them (values) in scale_fill_manual and for the sizes, using guides so:
library(ggplot2)
library(ggpubr)
shlab <- function(lbl_brk){
sub("^[a-z]+\\.","",lbl_brk)
}
ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix", size = "Frequency", size.range = c(1, 9), fill = "Dist") +
scale_x_discrete(labels = shlab) +
scale_y_discrete(labels = shlab) +
scale_fill_manual(values = c("green", "blue", "red", "black", "white")) +
guides(fill = guide_legend(override.aes = list(size=8))) +
theme_set(theme_gray() + theme(legend.key=element_blank())) + # Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) + # Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4) # Add Frequency Values Next to the circles
UPDATE:
With the new dataset and vector labels:
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
scale_y_discrete(limits = c("Cau", "Sem", "Cal", "Ort", "Fis", "Ani", "Can", "Zan")) +
scale_x_discrete(limits = c("Far", "Nor", "Cen", "Col")) +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)

ggplot with error bars differentiated by shape of points

Getting an error message that reads ' Error: All unnamed arguments must be length 1'. I am trying to differentiate my line by the shape of the point and I keep getting the errors above. I want to use all solid lines but differentiated by shape. The journal I want to public my work requires black and white instead of color
The plot I want to modify:
tgc <- structure(
list(
GROUP = structure(
c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L),
.Label = c("LLL", "LRL", "RLR", "RRR"),
class = "factor"
),
condition = structure(
c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L),
.Label = c("Midline", "No Midline crossing", "Midline crossing"),
class = "factor"
),
names = structure(
c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L),
.Label = c("102", "104"),
class = "factor"
),
Trial_type = structure(
c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("retention", "transfer"),
class = "factor"
),
Training = structure(
c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("right", "left"),
class = "factor"
),
N = c(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8),
NormalizedJerk = c(2060.1571177375, 1092.701687475,
2981.812368875, 1508.28547575, 2089.925186675, 1269.6704558875,
1391.97364445, 914.38364425, 3900.4119165625, 2335.7186722875,
4015.516784, 2563.16723475, 1411.2016892375, 847.952527975,
1853.413925525, 1394.847246875, 6830.81906275, 3915.019566375,
2310.5893110125, 1023.1702538875, 1340.4653657625, 765.2752940875,
5617.967587, 1705.629421875
),
sd = c(1507.9737264907, 946.522319997832,
1403.37144167604, 813.034284948276, 1383.95826055979, 915.414811222361,
580.358119711544, 361.141583497209, 3283.59638643298, 1471.60790379469,
1178.5979495842, 806.56121914288, 701.001824354642, 324.415375522303,
2103.42765701483, 1208.14874080532, 4671.80701687463, 2861.85154237875,
2130.92970547315, 813.649686635084, 489.544827982279, 256.250905151245,
5305.91963495324, 878.475841087918
),
se = c(533.14922392636,
334.646175507445, 496.166731466336, 287.451028112041, 489.303135460484,
323.648010306967, 205.187580982353, 127.682831329662, 1160.9266357632,
520.291964010475, 416.697301221774, 285.16245374901, 247.841571812654,
114.6981559765, 743.673980005258, 427.145083652716, 1651.73321101347,
1011.8173161826, 753.397422485958, 287.668605464989, 173.080233780536,
90.5983763588182, 1875.92587715314, 310.588112170911
),
ci = c(1260.69758461414,
791.31246230768, 1173.24788605882, 679.713672219011, 1157.01806049025,
765.305934151606, 485.191530116531, 301.92191947415, 2745.15527724569,
1230.29499600801, 985.332544042271, 674.302053778614, 586.052191261077,
271.218041235005, 1758.50952839896, 1010.03762375384, 3905.72840792368,
2392.56776402689, 1781.50181629779, 680.228160904389, 409.269718268804,
214.231117892992, 4435.85982330679, 734.424182295757
)
),
row.names = c(NA, -24L),
class = "data.frame"
)
tgc <- summarySE(
data10,
measurevar = "NormalizedJerk",
groupvars = c("GROUP", "condition", "names","Trial_type", "Training")
)
pd <- position_dodge2(0.2)
p <-ggplot(
gc,
aes(
names,
NormalizedJerk,
group = interaction(Training, Trial_type),
color = interaction(Training, Trial_type),
linetype = interaction(Training, Trial_type),
shape = Training
)
) +
geom_errorbar(
aes(ymin = NormalizedJerk - se, ymax = NormalizedJerk + se),
width = .3,
position = pd
) +
geom_line(
position = pd,
size = 1
) +
geom_point(
aes(shape = Training),
position = pd,
size = 1
)+
scale_colour_manual(
name = "Experimental group",
values = c("#999999","#999999","#000000","#000000")
labels = c("RRR","LLL","LRL","RLR")
) +
scale_shape_manual(
name = "Experimental group",
values = c("19","18","19","18")
) +
scale_linetype_manual(
name = "Experimental group",
values = c("solid","solid","solid","solid"),
labels = c("RRR","LLL","LRL","RLR")
) +
theme_bw() +
facet_wrap(.~condition) +
theme(
axis.title.y = element_text(vjust= 1.8, size = 14),
axis.title.x = element_text(vjust= -0.5, size = 14),
axis.title = element_text(face = "bold")
) +
xlab("Block of trials") +
ylab("Normalized Jerk")
p +
scale_x_discrete(
breaks=c("102","104"),
labels=c("Pretest","Posttest")
)
I modified your code a little. Thanks to Nic3500 for formatting the code.
You needed to add the same labels to the shape manual override as the others and turn the error bar legend off. I also increased the size of the legend because the default seems too small when showing different line types.
ggplot(tgc,
aes(
names,
NormalizedJerk,
group = interaction(Training, Trial_type),
color = interaction(Training, Trial_type),
linetype = interaction(Training, Trial_type),
shape = Training
)
) +
geom_errorbar(aes(ymin=NormalizedJerk-se, ymax=NormalizedJerk+se),
show.legend=FALSE, # <- here
width=.3, position=pd) +
geom_line(position=pd, size = 1) +
geom_point(position=pd, size= 2) +
scale_colour_manual(name = "Experimental group",
values=c("#999999","#999999","#000000","#000000"),
labels=c("RRR","LLL","LRL","RLR")) +
scale_shape_manual(name = "Experimental group", values=c(19,18,19,18),
labels=c("RRR","LLL","LRL","RLR")) + # <- here
scale_linetype_manual(name = "Experimental group",
values=c("solid","dashed","solid","dashed"),
labels=c("RRR","LLL","LRL","RLR")) +
theme_bw()+
facet_wrap(.~condition) +
theme(axis.title.y = element_text(vjust= 1.8, size = 14),
axis.title.x = element_text(vjust= -0.5, size = 14),
axis.title = element_text(face = "bold"),
legend.key.width = grid::unit(1.25, "cm")) + # <- here
xlab("Block of trials") + ylab("Normalized Jerk")

Reordering factor for plotting using forcats and ggplot2 packages from tidyverse

First of all, thanks^13 to tidyverse. I want the bars in the chart below to follow the same factor levels reordered by forcats::fct_reorder (). Surprisingly, I see different order of levels in the data set when View ()ed as when they are displayed in the chart (see below). The chart should illustrate the number of failed students before and after the bonus marks (I want to sort the bars based on the number of failed students before the bonus).
MWE
ggplot (df) +
geom_bar (aes (forcats::fct_reorder (subject, FailNo, .desc= TRUE), FailNo, fill = forcats::fct_rev (Bonus)), position = 'dodge', stat = 'identity') +
theme (axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))
Data output of dput (df)
structure(list(subject = structure(c(1L, 2L, 5L, 6L, 3L, 7L,
4L, 9L, 10L, 8L, 12L, 11L, 1L, 2L, 5L, 6L, 3L, 7L, 4L, 9L, 10L,
8L, 12L, 11L), .Label = c("CAB_1", "DEM_1", "SSR_2", "RRG_1",
"TTP_1", "TTP_2", "IMM_1", "RRG_2", "DEM_2", "VRR_2", "PRS_2",
"COM_2", "MEB_2", "PHH_1", "PHH_2"), class = "factor"), Bonus = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("After", "Before"), class = "factor"),
FailNo = c(29, 28, 20, 18, 15, 13, 12, 8, 5, 4, 4, 2, 21,
16, 16, 14, 7, 10, 10, 5, 3, 4, 4, 1)), .Names = c("subject",
"Bonus", "FailNo"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-24L))
Bar chart
The issue
According to the table above, SSR_2 var should come in the fifth rank and IMM_1 in the sixth, however in the chart we see these two variables swapping their positions. How to sort it right after tidyverse in this case?
Use factor with unique levels for your x -axis.
ggplot (df) +
geom_bar (aes(factor(forcats::fct_reorder
(subject, FailNo, .desc= TRUE),
levels=unique(subject)),
FailNo,
fill = forcats::fct_rev (Bonus)),
position = 'dodge', stat = 'identity') +
theme(axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))
Edited: #dotorate comment
Sort failNo before the bonus
library(dplyr)
df_before_bonus <- df %>% filter(Bonus == "Before") %>% arrange(desc(FailNo))
Use FailNo before the bonus to create the factor
df$subject <- factor(df$subject, levels = df_before_bonus$subject, ordered = TRUE)
Updated plot
ggplot(df) +
geom_bar(aes (x = subject, y = FailNo, fill = as.factor(Bonus)),
position = 'dodge', stat = 'identity') +
theme (axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))

How to generate facetted ggplot graph where each facet has ordered data?

I want to sort my factors (Condition, Parameter and SubjectID) by MeanWeight and plot MeanWeight against SubjectID such that when faceted by Condition and Parameter, MeanWeight appears in descending order.
Here is my solution, which isn't giving me what I want:
dataSummary <- structure(list(SubjectID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("s001",
"s002", "s003", "s004"), class = "factor"), Condition = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("1", "2", "3"), class = "factor"), Parameter = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L), .Label = c("(Intercept)", "PrevCorr1", "PrevFail1"), class = "factor"),
MeanWeight = c(-0.389685536725783, 0.200987679398502, -0.808114314421089,
-0.10196105040707, 0.0274188815763494, 0.359978984195839,
-0.554583879312783, 0.643791202050396, -0.145042221940287,
-0.0144598460145723, -0.225804028997856, -0.928152539784374,
0.134025102103562, -0.267448309989731, -1.19980109795115,
0.0587152632631923, 0.0050656268880826, -0.156537446664213
)), .Names = c("SubjectID", "Condition", "Parameter", "MeanWeight"
), row.names = c(NA, 18L), class = "data.frame")
## Order by three variables
orderWeights <- order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight)
## Set factors to the new order. I expect this to sort for each facet when plotting, but it doesn't seem to work.
conditionOrder <- dataSummary$Condition[orderWeights]
dataSummary$Condition <- factor(dataSummary$Condition, levels=conditionOrder)
paramOrder <- dataSummary$Parameter[orderWeights]
dataSummary$Parameter <- factor(dataSummary$Parameter, levels=paramOrder)
sbjOrder <- dataSummary$SubjectID[orderWeights]
dataSummary$SubjectID <- factor(dataSummary$SubjectID, levels=sbjOrder)
## Plot
ggplot(dataSummary, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition, scales="free_y")
I tried a few other approaches, but they didn't work either:
dataSummary <- dataSummary[order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight),]
or this one
dataSummary <- transform(dataSummary, SubjectID=reorder(Condition, Parameter, SubjectID, MeanWeight))
You can order your data and plot it. However, the labels no longer correspond to Subject ID's, but to the reordered subjects. If that is not what you want, you cannot use faceting but have to plot the parts separately and use e.g.grid.arrangeto combind the different plots.
require(plyr)
## Ordered data
datOrder <- ddply(dataSummary, c("Condition", "Parameter"), function(x){
if (nrow(x)<=1) return(x)
x$MeanWeight <- x$MeanWeight[order(x$MeanWeight)]
x
})
## Plot
ggplot(datOrder, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition) +
scale_y_discrete(name="Ordered subjects")

ggplot barchart with grouped confidence interval

a <- structure(list(
X1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L),
.Label = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8"), class = "factor"),
X2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
.Label = c("A", "B", "C"), class = "factor"),
value = c(0.03508924, 0.03054929, 0.03820896, 0.18207091, 0.25985142, 0.03909991, 0.03079736,
0.41436334, 0.02957787, 0.03113289, 0.03239794, 0.1691519, 0.16368845, 0.0287741, 0.02443448,
0.33474091, 0.03283068, 0.02668754, 0.03597605, 0.17098721, 0.23048966, 0.0385765, 0.02597068, 0.36917749),
se = c(0.003064016, 0.003189752, 0.003301929, 0.006415592, 0.00825635, 0.003479607,
0.003195332, 0.008754099, 0.005594554, 0.006840959, 0.006098068, 0.012790908, 0.014176414,
0.006249045, 0.005659445, 0.018284739, 0.005051873, 0.004719352, 0.005487301, 0.011454206,
0.01290797, 0.005884275, 0.004738851, 0.014075813)),
.Names = c("X1", "X2", "value", "se"), class = "data.frame", row.names = c(NA, -24L))
I'm plotting the above data (kept in dataset "a"), and I can't get the confidence interals to sit in the middle of the group chart.My attempts until now have only managed to put lines on the side of each bar, not in the middle like in the geom_errorbar helpfile.I've tried to manipulate the dodge parameters but it only made it worse.
The chart needs to stay flipped over and in the code below I used geom_linerange but geom_errorbar would be even better.
Another thing I haven't quite managed to do is to change the scale into whole numbers (without muliplying the original table ).
I've used the code below on a<-a[1:16,] (the first two groups).
When I use the same code on the full table I get even worse results with the confidence intervals.
Would anyone be able to help? Many thanks in advance.
limits <- aes(ymax = value + se, ymin=value - se)
p<-ggplot(data = a, aes(x = X1, y =value))+
geom_bar(aes(fill=X2),position = "dodge") +
scale_x_discrete(name="")+
scale_fill_manual(values=c("grey80","black","red"))+
scale_y_continuous(name="%")+
theme(axis.text.y = element_text(face='bold'),
legend.position ="top",
legend.title=element_blank())+
coord_flip()
p + geom_linerange(limits)
Try this ,
p<-ggplot(data = df, aes(x = X1, y =value,fill= X2))+
geom_bar(position=position_dodge()) +
geom_errorbar(aes(ymax = value + 2* se, ymin=value,colour = X2),position=position_dodge(.9))
p <- p + scale_x_discrete(name="")+
scale_fill_manual(values=c("grey80","black","red"))+
scale_y_continuous(name="%")+
theme(axis.text.y = element_text(face='bold'),
legend.position ="top",
legend.title=element_blank())+
coord_flip()

Resources