ggplot barchart with grouped confidence interval - r

a <- structure(list(
X1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L),
.Label = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8"), class = "factor"),
X2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
.Label = c("A", "B", "C"), class = "factor"),
value = c(0.03508924, 0.03054929, 0.03820896, 0.18207091, 0.25985142, 0.03909991, 0.03079736,
0.41436334, 0.02957787, 0.03113289, 0.03239794, 0.1691519, 0.16368845, 0.0287741, 0.02443448,
0.33474091, 0.03283068, 0.02668754, 0.03597605, 0.17098721, 0.23048966, 0.0385765, 0.02597068, 0.36917749),
se = c(0.003064016, 0.003189752, 0.003301929, 0.006415592, 0.00825635, 0.003479607,
0.003195332, 0.008754099, 0.005594554, 0.006840959, 0.006098068, 0.012790908, 0.014176414,
0.006249045, 0.005659445, 0.018284739, 0.005051873, 0.004719352, 0.005487301, 0.011454206,
0.01290797, 0.005884275, 0.004738851, 0.014075813)),
.Names = c("X1", "X2", "value", "se"), class = "data.frame", row.names = c(NA, -24L))
I'm plotting the above data (kept in dataset "a"), and I can't get the confidence interals to sit in the middle of the group chart.My attempts until now have only managed to put lines on the side of each bar, not in the middle like in the geom_errorbar helpfile.I've tried to manipulate the dodge parameters but it only made it worse.
The chart needs to stay flipped over and in the code below I used geom_linerange but geom_errorbar would be even better.
Another thing I haven't quite managed to do is to change the scale into whole numbers (without muliplying the original table ).
I've used the code below on a<-a[1:16,] (the first two groups).
When I use the same code on the full table I get even worse results with the confidence intervals.
Would anyone be able to help? Many thanks in advance.
limits <- aes(ymax = value + se, ymin=value - se)
p<-ggplot(data = a, aes(x = X1, y =value))+
geom_bar(aes(fill=X2),position = "dodge") +
scale_x_discrete(name="")+
scale_fill_manual(values=c("grey80","black","red"))+
scale_y_continuous(name="%")+
theme(axis.text.y = element_text(face='bold'),
legend.position ="top",
legend.title=element_blank())+
coord_flip()
p + geom_linerange(limits)

Try this ,
p<-ggplot(data = df, aes(x = X1, y =value,fill= X2))+
geom_bar(position=position_dodge()) +
geom_errorbar(aes(ymax = value + 2* se, ymin=value,colour = X2),position=position_dodge(.9))
p <- p + scale_x_discrete(name="")+
scale_fill_manual(values=c("grey80","black","red"))+
scale_y_continuous(name="%")+
theme(axis.text.y = element_text(face='bold'),
legend.position ="top",
legend.title=element_blank())+
coord_flip()

Related

How to prevent R from alphabetically ranking data in ggplot and specify the order in which data is plotted (Data + Code + Graphs provided)?

I'm trying to fix an issue with my GGBalloonPlot graph with regards to how R processes the axis labels.
By default R plots the data using the labels ranked in reverse alphabetical order but to reveal the pattern of the data, the data need to be plotted in a specific order. The only way I've been able to do trick the software is by manually adding a prefix to each label in my .csv table so that R would rank them properly in my output. This is time consuming since I need to manually order the data first before adding the prefix and then plotting.
I would like to input a character vector (or something like that) which would essentially specify the order in which I want to have the data plotted which would reveal the pattern without the need for a prefix in the label name.
I have made some attempts with "scale_y_discrete" without success. I would also like to do the same thing for the X axis since I've had to use the same "trick" to display the columns in the proper non-alphabetical order which offsets the position of the labels. Any idea on how to get GGplot to display my values as seen in the graph without having to "trick" the software since this is quite time consuming ?
Data + Code
#Assign data to "Stack_Overflow_DummyData"
Stack_Overflow_DummyData <- structure(list(Species = structure(c(8L, 3L, 1L, 5L, 6L, 2L,
7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L,
7L, 4L, 8L, 3L, 1L, 5L, 6L, 2L, 7L, 4L), .Label = c("Ani", "Cal",
"Can", "Cau", "Fis", "Ort", "Sem", "Zan"), class = "factor"),
Species_prefix = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L,
1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L,
2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("ac.Cau",
"ad.Sem", "af.Cal", "ag.Ort", "as.Fis", "at.Ani", "be.Can",
"bf.Zan"), class = "factor"), Dist = structure(c(2L, 3L,
5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L,
3L, 5L, 2L, 1L, 1L, 4L, 5L, 2L, 3L, 5L, 2L, 1L, 1L, 4L, 5L
), .Label = c("End", "Ind", "Pan", "Per", "Wid"), class = "factor"),
Region = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Cen", "Col",
"Far", "Nor"), class = "factor"), Region_prefix = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("a.Far", "b.Nor", "c.Cen", "d.Col"), class = "factor"),
Frequency = c(75, 50, 25, 50, 0, 0, 0, 0, 11.1, 22.2, 55.6,
55.6, 11.1, 0, 5.6, 0, 0, 2.7, 36.9, 27.9, 65.8, 54.1, 37.8,
28.8, 0, 0, 0, 3.1, 34.4, 21.9, 78.1, 81.3)), class = "data.frame", row.names = c(NA,
-32L))
# Plot Data With Prefix Trick
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)
# Add Frequency Values Next to the circles
# Plot Data Without Prefix Trick
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)
# Add Frequency Values Next to the circles
Here below are the graphs
Good Graph.
Using the label prefix trick with the visible pattern in the data:
Wrong Graph (R default).
Without the prefix trick when GGplot automatically orders the data/labels and the graph makes no sense:
To sum up, I would like the Good graph output without having to have to previously add a prefix in my labels.
Many Thanks in advance for your help.
For the axis labels I would define a previous function to override the breaks:
shlab <- function(lbl_brk){
sub("^[a-z]+\\.","",lbl_brk) # removes the starts of strings as a. or ab.
}
Then, to change the labels you just have to use scale_x,y_discrete with labels = shlab (if you look at the help of scale_x_discrete you will see that one of the options for labels is A function that takes the breaks as input and returns labels as output).
For the colours would be enough to change them (values) in scale_fill_manual and for the sizes, using guides so:
library(ggplot2)
library(ggpubr)
shlab <- function(lbl_brk){
sub("^[a-z]+\\.","",lbl_brk)
}
ggballoonplot(Stack_Overflow_DummyData, x = "Region_prefix", y = "Species_prefix", size = "Frequency", size.range = c(1, 9), fill = "Dist") +
scale_x_discrete(labels = shlab) +
scale_y_discrete(labels = shlab) +
scale_fill_manual(values = c("green", "blue", "red", "black", "white")) +
guides(fill = guide_legend(override.aes = list(size=8))) +
theme_set(theme_gray() + theme(legend.key=element_blank())) + # Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) + # Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4) # Add Frequency Values Next to the circles
UPDATE:
With the new dataset and vector labels:
library(ggplot2)
library(ggpubr)
# make color base on Dist, size and alpha dependent on Frequency
ggballoonplot(Stack_Overflow_DummyData, x = "Region", y = "Species",
size = "Frequency", size.range = c(1, 9), fill = "Dist") +
scale_y_discrete(limits = c("Cau", "Sem", "Cal", "Ort", "Fis", "Ani", "Can", "Zan")) +
scale_x_discrete(limits = c("Far", "Nor", "Cen", "Col")) +
theme_set(theme_gray() +
theme(legend.key=element_blank())) +
# Sets Grey Theme and removes grey background from legend panel
theme(axis.title = element_blank()) +
# Removes X axis title (Region)
geom_text(aes(label=Frequency), alpha=1.0, size=3, nudge_x = 0.4)

How do I make a dot plot with a continuous x-axis (ggplot2)?

I'm trying to create a vertically oriented double plot with a line plot above and dot plot below, with both on the same (continuous, date) x-axis. I've successfully placed the two plots on a common axis and finished the (upper) line plot, but when I try to change the (lower) dot plot's x-axis from categorical to continuous, all my dots bunch up in the middle of the plot.
I only include here my code for the dot plot for simplicity, but if it turns out I need to show you the full double plot, I can do that.
Here's a small subset of my data, then my code, as far as I've gotten with it:
data <- structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L
), .Label = c("11/11/2016", "12/16/2016", "12/2/2016", "12/23/2016"
), class = "factor"), factor = c(2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L
), temp = c(-19.85, -19.94, -20.77, -21.3, -21.71, -21.88, -22.03,
-22.74, -22.86, -18.88, -19.02, -19.22, -19.32, -19.32, -19.55,
-19.68, -20.23, -20.32, -21.37, -16.63, -19.01, -19.67, -20.47,
-21.14, -21.23, -23.01, -24.43, -24.61, -24.76, -15.9, -18.87,
-19.02, -19.16, -19.44, -19.62, -22.38, -24.37, -24.92, -26.9
)), .Names = c("date", "factor", "temp"), class = "data.frame", row.names = c(NA,
-39L))
library(ggplot2)
library(scales)
#format date and order date levels (the second line here gives me a warning, but seems to do what I want it to)..
data$date <- as.Date(data$date, "%m/%d/%Y")
data$date.chr <- factor(data$date, as.character(data$date))
data$date.chr <- as.Date(data$date.chr)
#now plot..
ggplot(data, aes(x = date.chr, fill = factor(factor), y = temp)) +
geom_dotplot(binaxis = 'y', stackdir = 'center', method = 'histodot', binwidth = 0.3, position=position_dodge(0.8)) +
scale_x_date(date_breaks = "2 weeks", labels = date_format("%e %b"), limits = as.Date(c("2016-11-04","2016-12-23"))) +
labs(title="", x="", y="response temp (°C)") +
theme_minimal() +
theme(axis.title.y = element_text(vjust=1)) +
theme(legend.position="top") +
guides(fill = guide_legend(override.aes = list(size=10)))
(My session info:
R version 3.3.2 (2016-10-31)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1)
Any suggestions how I can (dot) plot this data on a continuous x-axis? (again, so I can line it up with the date axis in a plot above it)
I'm not sure if this is what you are looking for, but let's see:
data$date <- as.Date(data$date, "%m/%d/%Y")
data$date.chr <- factor(data$date)
#create dummy variable to get both the position and "filling" right
data$datefact <- paste(data$factor,data$date.chr)
The trick here is to set the "group" argument in geom_dotplot to the dummy variable created before:
ggplot(data, aes(x = date, y = temp)) +
# geom_point() +
geom_dotplot(aes(x = date, group = datefact, fill = factor(factor)),binaxis = 'y',
stackdir = 'center',
method = 'histodot',
binwidth = 0.3)+
scale_x_date(date_breaks = "2 weeks", labels = date_format("%e %b"), limits = as.Date(c("2016-11-04","2016-12-23"))) +
labs(title="", x="", y="response temp (°C)") +
theme_minimal() +
theme(axis.title.y = element_text(vjust=1)) +
theme(legend.position="top") +
guides(fill = guide_legend(override.aes = list(size=10)))
giving:
Is this what you wanted ?

Add text to plot with facetted bar chart

My question is related to this question. I want "2014" in the 4-year facet. I tried to repeat but my code doesn't give what I want.
Annotating text on individual facet in ggplot2
This is my data
structure(list(Rot = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2-year",
"3-year", "4-year"), class = "factor"), Rot.Herb = structure(c(3L,
3L, 4L, 4L, 13L, 13L, 14L, 14L, 5L, 5L, 6L, 6L, 9L, 9L, 10L,
10L, 15L, 15L, 16L, 16L, 1L, 1L, 2L, 2L, 7L, 7L, 8L, 8L, 11L,
11L, 12L, 12L, 17L, 17L, 18L, 18L), .Label = c("A4-conv", "A4-low",
"C2-conv", "C2-low", "C3-conv", "C3-low", "C4-conv", "C4-low",
"O3-conv", "O3-low", "O4-conv", "O4-low", "S2-conv", "S2-low",
"S3-conv", "S3-low", "S4-conv", "S4-low"), class = "factor"),
variable = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Diversity",
"Evenness"), class = "factor"), N = c(4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4), value = c(0.78537789925, 0.613408315,
1.305194686, 0.79519430975, 0.4481728555, 0.30608817425,
1.20978861475, 0.8580643725, 0.92387324875, 0.630166121,
0.945954185, 0.561172324, 1.43952456275, 0.8616864655, 1.23679146725,
0.831737624, 1.033474108, 0.80689293925, 0.9910142125, 0.79342098075,
1.175512223, 0.6293940245, 0.981614832, 0.62342189825, 1.351710013,
0.805075937, 1.6598348325, 0.7983622545, 1.01606920875, 0.5751418795,
1.0500365255, 0.56408326225, 1.07162937725, 0.6756859865,
0.45699816625, 0.44444147325), sd = c(0.354077266902404,
0.208934910331856, 0.169501822767995, 0.0774319459391732,
0.737366460962239, 0.40697977697835, 0.494107033311986, 0.11906912863268,
0.491492768082854, 0.34236657107712, 0.219739438843007, 0.205905593411204,
0.319301583035043, 0.0696484379979274, 0.0563293598951725,
0.0978700910274188, 0.446850757364563, 0.175073468716825,
0.426859848850874, 0.180469101499932, 0.526842123835502,
0.200470277385505, 0.574885944755375, 0.27189545397305, 0.39621771945215,
0.150798258847229, 0.275863362594154, 0.111178397407429,
0.254811233135664, 0.158920851982914, 0.198698241334475,
0.0730606635175717, 0.717706309307313, 0.453776579066358,
0.574276936403411, 0.513758415496589), se = c(0.177038633451202,
0.104467455165928, 0.0847509113839974, 0.0387159729695866,
0.368683230481119, 0.203489888489175, 0.247053516655993,
0.0595345643163399, 0.245746384041427, 0.17118328553856,
0.109869719421504, 0.102952796705602, 0.159650791517521,
0.0348242189989637, 0.0281646799475863, 0.0489350455137094,
0.223425378682282, 0.0875367343584126, 0.213429924425437,
0.090234550749966, 0.263421061917751, 0.100235138692753,
0.287442972377688, 0.135947726986525, 0.198108859726075,
0.0753991294236146, 0.137931681297077, 0.0555891987037145,
0.127405616567832, 0.0794604259914568, 0.0993491206672376,
0.0365303317587859, 0.358853154653656, 0.226888289533179,
0.287138468201705, 0.256879207748294), ci = c(0.563415944919255,
0.332462066715199, 0.26971522480343, 0.123211505132525, 1.1733145846647,
0.647595643784969, 0.786234551289211, 0.189465554245211,
0.782074671929471, 0.544781614588516, 0.349654482635521,
0.327641747494367, 0.508080071600555, 0.110826207087643,
0.089632581638694, 0.155733154793995, 0.71103927089404, 0.278580956835532,
0.679229274424713, 0.287166612643164, 0.838323385234058,
0.318992946792351, 0.914771825423139, 0.432646341459985,
0.630470808679215, 0.23995368085579, 0.438960169525453, 0.176909640028318,
0.40546153371869, 0.252878539112781, 0.316173242000635, 0.116255819336536,
1.14203089616693, 0.722059798737006, 0.91380275723334, 0.817504285602766
)), .Names = c("Rot", "Rot.Herb", "variable", "N", "value",
"sd", "se", "ci"), row.names = c(NA, -36L), class = "data.frame")
and the code to graph
p <- ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable)))+
geom_bar(stat="identity", position="dodge")+
scale_fill_brewer(palette = "Set1")+
theme_bw() +
theme(panel.grid.major=element_blank()) +
facet_grid(~Rot, scales = "free_x", space="free_x")+
theme(legend.title=element_blank(),legend.text=element_text(size=20),legend.position="top")+
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9))+
xlab("\nTreatment") +
theme(axis.title = element_text(size=24,face="bold", vjust=4), axis.text.x = element_text(size=20,angle = 90, hjust = 1)) +
ylab("Shannon's H' and E'") +
theme(axis.title = element_text(size=24,face="bold", vjust=2), axis.text.y = element_text(size=20, color="black"))+
theme(strip.text.x = element_text(colour = "black", size = 20), strip.background = element_rect(fill = "white"))
produced graph (please don't mind the "2014" on the y-axis).
New code to annotate 2014, with help from eipi10
ann_text <- data.frame(x = "S4-conv",y = 1.75,lab = "2014", Rot.Herb=NA,
value=NA, variable=NA,
N=NA, sd=NA, se=NA, ci=NA,
Rot = factor("4-year",levels = c("2-year","3-year","4-year")))
I got an error saying Error: Discrete value supplied to continuous scale after I run p + geom_text(data = ann_text,label = "2014"). Please see what have been wrong with my code and data format. Thanks.
It turns out the issue is that when you include value=NA in ann_text it gets interpreted as logical (rather than numeric, which is its mode in Shannon.long2), causing the error because ggplot expects a numeric variable rather than a categorical one. Set value=NA_real_ (in addition to NA, R has class-specific missing value constants; see ?NA for more info) in ann_text to ensure value is interpreted as numeric and resolve the error. Or set value to any number, e.g., value=0.
In the example below, I've removed all of the theme and lab statements to shorten the code down to the essentials:
p = ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable))) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9)) +
facet_grid(~Rot, scales = "free_x", space="free_x")
ann_text <- data.frame(x = "S4-conv", y = 1.75, lab = "2014", Rot.Herb=NA,
value=NA_real_, variable=NA)
p + geom_text(data = ann_text, aes(label=lab, x, y))
Note that you also need to feed x and y values to geom_text to provide the label location.
Another option would be to just use the same x and y variable names as in your original data frame, since ggplot already knows these names and has scaled the graph based on them. Now the only missing column we need to add is variable:
ann_text <- data.frame(Rot.Herb = "S4-conv", value = 1.75, lab = "2014", variable=NA)
p + geom_text(data = ann_text, aes(label=lab, Rot.Herb, value))

How to generate facetted ggplot graph where each facet has ordered data?

I want to sort my factors (Condition, Parameter and SubjectID) by MeanWeight and plot MeanWeight against SubjectID such that when faceted by Condition and Parameter, MeanWeight appears in descending order.
Here is my solution, which isn't giving me what I want:
dataSummary <- structure(list(SubjectID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("s001",
"s002", "s003", "s004"), class = "factor"), Condition = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("1", "2", "3"), class = "factor"), Parameter = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L), .Label = c("(Intercept)", "PrevCorr1", "PrevFail1"), class = "factor"),
MeanWeight = c(-0.389685536725783, 0.200987679398502, -0.808114314421089,
-0.10196105040707, 0.0274188815763494, 0.359978984195839,
-0.554583879312783, 0.643791202050396, -0.145042221940287,
-0.0144598460145723, -0.225804028997856, -0.928152539784374,
0.134025102103562, -0.267448309989731, -1.19980109795115,
0.0587152632631923, 0.0050656268880826, -0.156537446664213
)), .Names = c("SubjectID", "Condition", "Parameter", "MeanWeight"
), row.names = c(NA, 18L), class = "data.frame")
## Order by three variables
orderWeights <- order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight)
## Set factors to the new order. I expect this to sort for each facet when plotting, but it doesn't seem to work.
conditionOrder <- dataSummary$Condition[orderWeights]
dataSummary$Condition <- factor(dataSummary$Condition, levels=conditionOrder)
paramOrder <- dataSummary$Parameter[orderWeights]
dataSummary$Parameter <- factor(dataSummary$Parameter, levels=paramOrder)
sbjOrder <- dataSummary$SubjectID[orderWeights]
dataSummary$SubjectID <- factor(dataSummary$SubjectID, levels=sbjOrder)
## Plot
ggplot(dataSummary, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition, scales="free_y")
I tried a few other approaches, but they didn't work either:
dataSummary <- dataSummary[order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight),]
or this one
dataSummary <- transform(dataSummary, SubjectID=reorder(Condition, Parameter, SubjectID, MeanWeight))
You can order your data and plot it. However, the labels no longer correspond to Subject ID's, but to the reordered subjects. If that is not what you want, you cannot use faceting but have to plot the parts separately and use e.g.grid.arrangeto combind the different plots.
require(plyr)
## Ordered data
datOrder <- ddply(dataSummary, c("Condition", "Parameter"), function(x){
if (nrow(x)<=1) return(x)
x$MeanWeight <- x$MeanWeight[order(x$MeanWeight)]
x
})
## Plot
ggplot(datOrder, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition) +
scale_y_discrete(name="Ordered subjects")

placing linear line based on the aggregate data in ggplot2

dput(x)
structure(list(Date = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L), .Label = c("1/1/2012", "2/1/2012", "3/1/2012"
), class = "factor"), Server = structure(c(1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"),
Storage = c(10000L, 20000L, 30000L, 15000L, 15000L, 25000L,
35000L, 15700L, 16000L, 27000L, 37000L, 16700L)), .Names = c("Date",
"Server", "Storage"), class = "data.frame", row.names = c(NA,
-12L))
I would like to create a stack bar x=Date, y=Storage and alos place a linear line based on the total storage.
I have come up with this ggplot line:
ggplot(x, aes(x=Date, y=Storage)) + geom_bar(aes(x=Date,y=Storage,fill=Server), stat="identity", position="stack") + geom_smooth(aes(group=1),method="lm", size=2, color="red")
It kinda works but linear line is not based on total storage for a given Date on the date frame x. Is there an easy way to do this?
Often the easiest way is just to calculate the values outside of ggplot2. So calculate the totals:
dd = as.data.frame(tapply(x$Storage, x$Date, sum))
dd$Date = rownames(dd)
colnames(dd)[1] = "Storage"
then add a geom_smooth call but specify the data:
ggplot(x, aes(x=Date, y=Storage)) +
geom_bar(aes(x=Date,y=Storage, fill=Server), stat="identity", position="stack") +
geom_smooth(data = dd, aes(x=Date, y=Storage, group=1),method="lm")

Resources