placing linear line based on the aggregate data in ggplot2 - r

dput(x)
structure(list(Date = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L), .Label = c("1/1/2012", "2/1/2012", "3/1/2012"
), class = "factor"), Server = structure(c(1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"),
Storage = c(10000L, 20000L, 30000L, 15000L, 15000L, 25000L,
35000L, 15700L, 16000L, 27000L, 37000L, 16700L)), .Names = c("Date",
"Server", "Storage"), class = "data.frame", row.names = c(NA,
-12L))
I would like to create a stack bar x=Date, y=Storage and alos place a linear line based on the total storage.
I have come up with this ggplot line:
ggplot(x, aes(x=Date, y=Storage)) + geom_bar(aes(x=Date,y=Storage,fill=Server), stat="identity", position="stack") + geom_smooth(aes(group=1),method="lm", size=2, color="red")
It kinda works but linear line is not based on total storage for a given Date on the date frame x. Is there an easy way to do this?

Often the easiest way is just to calculate the values outside of ggplot2. So calculate the totals:
dd = as.data.frame(tapply(x$Storage, x$Date, sum))
dd$Date = rownames(dd)
colnames(dd)[1] = "Storage"
then add a geom_smooth call but specify the data:
ggplot(x, aes(x=Date, y=Storage)) +
geom_bar(aes(x=Date,y=Storage, fill=Server), stat="identity", position="stack") +
geom_smooth(data = dd, aes(x=Date, y=Storage, group=1),method="lm")

Related

Hot to split geom_col by different groups?

I am trying to split the graph below by countries. It shows the share of education comparing 2000 with 2020. But i would like to see this comparison by countries, first germany 2000 compared with germany 2020 next to each other, then the same for the United Kingdom next to them. My guess is that i have to do it with position = "dodge", but i am not sure where to include it.
Here is my code:
ggplot(df, aes(y=share, x=country,fill= education)) +
geom_col(aes(time))
Here is the data:
df= structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Germany", "United Kingdom"
), class = "factor"), education = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L), .Label = c("High", "Mid", "Low"
), class = "factor"), time = structure(c(1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("2000", "2020"), class = "factor"),
share = c(33.2845576041362, 56.9310311189152, 46.4851806022038,
41.3352189028342, 49.8323036187114, 38.9338040600176, 39.9891518069022,
48.1659773543969, 47.2698138789597, 38.9375077036854, 44.5437053326003,
43.9618838189481)), row.names = c(NA, -12L), class = c("tbl_df",
"tbl", "data.frame"))
We need position outside the aes in geom_col. By default, the value is "stack"
library(ggplot2)
ggplot(df, aes(y=share, x=country,fill= education)) +
geom_col(position = "dodge") +
facet_wrap(~ time)
df %>%
ggplot(aes(x = time, fill = education, y = share)) +
geom_col() +
facet_wrap(~country)
you may need to use facet_wrap():
library(ggplot2)
ggplot(df, aes(y=share, x=country,fill= education)) +
geom_col(aes(time), position = "dodge") +
facet_wrap(~country)
the untidy way is to map country to colour aesthetic (it's not):
library(ggplot2)
ggplot(df, aes(y=share, fill= education, colour = country)) +
geom_col(aes(time), position = "dodge")

Having Two or More Line Breaks and Italicized Words in Axis Labels in a 'ggplot()' Plot in R

I have the following data:
df <- structure(list(Site = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Permafrost", "Palsa",
"Palsa Hollow", "Rich Sphagnum Lawn", "Tall Graminoid Fen"), class = "factor"),
Depth = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L), .Label = c("Upper Depth", "Lower Depth"
), class = "factor"), ug.Al.m2 = c(0.093375394, 0.15684537,
0.025747986, 0.031130205, 0.074247144, 0.054740061, 0.006671475,
0.002208133, 0.003427595, 0.001447068, 0.013960114, 0.008988422,
0.047630561, 0.005434406, 0.041627689, 0.004127627, 0.013713378,
0.00501951, 0.512382579, 0.628336756, 0.293063584, 0.460299194,
0.188002926, 0.385744659, 0.220549738, 0.003135834, 0.006755556,
0.012846966, 0.008662843, 0.0064347, 0.004951768)), row.names = c(NA,
-31L), class = "data.frame")
I am using it to make a barplot:
library (cowplot)
library (ggplot2)
library (RColorBrewer)
X_Axis_Labels <- c("Permafrost", "Palsa", expression(atop("Palsa", "Hollows")), expression(atop("Rich", italic("Sphagnum"), "Lawn")), expression(atop("Tall", "Graminoid", "Fen")))
Legend_Labels <- c("Permafrost", "Palsa", "Palsa Hollows", expression(paste("Rich ", italic("Sphagnum"), " Lawn")), "Tall Graminoid Fen")
Palette1 <- c(brewer.pal(11, "RdBu")[c(11,10,9,8,7)])
ggplot(df, aes(x = Site, y = ug.Al.m2, fill = Site)) +
stat_summary(geom = "bar", width = 0.6, fun = mean, colour = "black") +
stat_summary(geom = "errorbar", width = 0.2, fun.data = mean_se) +
ggtitle("Total Aluminum Concentrations in Permafrost Peatland Communities") +
scale_x_discrete(labels = X_Axis_Labels) +
scale_fill_manual(values = Palette1, labels = Legend_Labels) +
ylab(expression(paste("Aluminum Concentration, ", mu, "g m" ^ "-2"))) +
xlab("Site") +
theme_cowplot(13)
Here's what the graph looks like:
I'm having a lot of trouble getting all three lines of the x axis labels to appear on my graph. The word 'Lawn', which should appear under 'Sphagnum', is lost. Since the word 'Sphagnum' needs to be italicized, I can't simply use the standard line break (\n). I've also tried playing with the plot margins to no avail.
Is there a solution to this problem?
Thank you!
Try this approach with ggtext and element_markdown(). You can use ** for italic and <br> for the break line. You can customize at any level you wish. Here the code:
library (cowplot)
library (ggplot2)
library (RColorBrewer)
library(ggtext)
X_Axis_Labels <- c("Permafrost", "Palsa", "Palsa<br>Hollows", "Rich<br>*Sphagnum*<br>Lawn",
"Tall<br>*Graminoid*<br>Fen")
Legend_Labels <- c("Permafrost", "Palsa", "Palsa Hollows", expression(paste("Rich ", italic("Sphagnum"), " Lawn")), "Tall Graminoid Fen")
Palette1 <- c(brewer.pal(11, "RdBu")[c(11,10,9,8,7)])
ggplot(df, aes(x = Site, y = ug.Al.m2, fill = Site)) +
stat_summary(geom = "bar", width = 0.6, fun = mean, colour = "black") +
stat_summary(geom = "errorbar", width = 0.2, fun.data = mean_se) +
ggtitle("Total Aluminum Concentrations in Permafrost Peatland Communities") +
scale_x_discrete(labels = X_Axis_Labels) +
scale_fill_manual(values = Palette1, labels = Legend_Labels) +
ylab(expression(paste("Aluminum Concentration, ", mu, "g m" ^ "-2"))) +
xlab("Site") +
theme_cowplot(13)+
theme(axis.text.x = element_markdown())
Output:

r- ggplot decrease number of intervals of axis or spacing the axe tiks

I've made a group plot of time series with ggplot with this syntax:
ggplot(Tur_flow, aes(x=time, group=parameter, colour=parameter))
+ geom_point(aes(y=value), size=1)
+ stat_smooth(aes(y=value), method=lm)
+ facet_grid(parameter ~ Section, scale="free_y")
+ theme_minimal()
+ theme(text = element_text(size=16))
dput(head(Tur_flow))
structure(list(Section = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("S-5", "S-50", "S+5", "S+50"), class = "factor"), parameter = structure(c(3L,
3L, 3L, 3L, 3L, 3L), .Label = c("Discharge", "Mean_Velocity",
"T_15", "T_25", "T_65", "Water_Depth"), class = "factor"), time = structure(c(6L, 13L, 20L, 27L, 34L, 41L), .Label = c("11:59:55", "11:59:56",
"11:59:58", "11:59:59", "12:00:00", "12:00:02", "12:00:05", "12:00:55",
"12:00:56", "12:00:58", "12:00:59", "12:01:00", "12:01:01", "12:01:05",
"12:01:55", "12:01:56............. "8.30", "8.31", "8.41", "8.54", "8.94", "800.31", "822.01", "828.77", "839.30", "846.11", "847.60", "8497.25", "894.21", "91.66", "91.67", "91.68", "91.90", "92.08", "92.23", "92.54", "93.23", "974.50", "N/A"), class = "factor")), .Names = c("Section", "parameter",
"time", "value"), row.names = c(NA, 6L), class = "data.frame")
How can I reduce the interval of both x and y axis? I mean spacing the axes? The x_axis data is time?
On y-axis how can I reduce decimal numbers?

How to generate facetted ggplot graph where each facet has ordered data?

I want to sort my factors (Condition, Parameter and SubjectID) by MeanWeight and plot MeanWeight against SubjectID such that when faceted by Condition and Parameter, MeanWeight appears in descending order.
Here is my solution, which isn't giving me what I want:
dataSummary <- structure(list(SubjectID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("s001",
"s002", "s003", "s004"), class = "factor"), Condition = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("1", "2", "3"), class = "factor"), Parameter = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L), .Label = c("(Intercept)", "PrevCorr1", "PrevFail1"), class = "factor"),
MeanWeight = c(-0.389685536725783, 0.200987679398502, -0.808114314421089,
-0.10196105040707, 0.0274188815763494, 0.359978984195839,
-0.554583879312783, 0.643791202050396, -0.145042221940287,
-0.0144598460145723, -0.225804028997856, -0.928152539784374,
0.134025102103562, -0.267448309989731, -1.19980109795115,
0.0587152632631923, 0.0050656268880826, -0.156537446664213
)), .Names = c("SubjectID", "Condition", "Parameter", "MeanWeight"
), row.names = c(NA, 18L), class = "data.frame")
## Order by three variables
orderWeights <- order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight)
## Set factors to the new order. I expect this to sort for each facet when plotting, but it doesn't seem to work.
conditionOrder <- dataSummary$Condition[orderWeights]
dataSummary$Condition <- factor(dataSummary$Condition, levels=conditionOrder)
paramOrder <- dataSummary$Parameter[orderWeights]
dataSummary$Parameter <- factor(dataSummary$Parameter, levels=paramOrder)
sbjOrder <- dataSummary$SubjectID[orderWeights]
dataSummary$SubjectID <- factor(dataSummary$SubjectID, levels=sbjOrder)
## Plot
ggplot(dataSummary, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition, scales="free_y")
I tried a few other approaches, but they didn't work either:
dataSummary <- dataSummary[order(dataSummary$Condition, dataSummary$Parameter, dataSummary$SubjectID, -dataSummary$MeanWeight),]
or this one
dataSummary <- transform(dataSummary, SubjectID=reorder(Condition, Parameter, SubjectID, MeanWeight))
You can order your data and plot it. However, the labels no longer correspond to Subject ID's, but to the reordered subjects. If that is not what you want, you cannot use faceting but have to plot the parts separately and use e.g.grid.arrangeto combind the different plots.
require(plyr)
## Ordered data
datOrder <- ddply(dataSummary, c("Condition", "Parameter"), function(x){
if (nrow(x)<=1) return(x)
x$MeanWeight <- x$MeanWeight[order(x$MeanWeight)]
x
})
## Plot
ggplot(datOrder, aes(x=MeanWeight, y=SubjectID)) +
scale_x_continuous(limits=c(-3, 3)) +
geom_vline(yintercept = 0.0, size = 0.1, colour = "#a9a9a9", linetype = "solid") +
geom_segment(aes(yend=SubjectID), xend=0, colour="grey50") +
geom_point(size=2) +
facet_grid(Parameter~Condition) +
scale_y_discrete(name="Ordered subjects")

ggplot barchart with grouped confidence interval

a <- structure(list(
X1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L),
.Label = c("V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8"), class = "factor"),
X2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
.Label = c("A", "B", "C"), class = "factor"),
value = c(0.03508924, 0.03054929, 0.03820896, 0.18207091, 0.25985142, 0.03909991, 0.03079736,
0.41436334, 0.02957787, 0.03113289, 0.03239794, 0.1691519, 0.16368845, 0.0287741, 0.02443448,
0.33474091, 0.03283068, 0.02668754, 0.03597605, 0.17098721, 0.23048966, 0.0385765, 0.02597068, 0.36917749),
se = c(0.003064016, 0.003189752, 0.003301929, 0.006415592, 0.00825635, 0.003479607,
0.003195332, 0.008754099, 0.005594554, 0.006840959, 0.006098068, 0.012790908, 0.014176414,
0.006249045, 0.005659445, 0.018284739, 0.005051873, 0.004719352, 0.005487301, 0.011454206,
0.01290797, 0.005884275, 0.004738851, 0.014075813)),
.Names = c("X1", "X2", "value", "se"), class = "data.frame", row.names = c(NA, -24L))
I'm plotting the above data (kept in dataset "a"), and I can't get the confidence interals to sit in the middle of the group chart.My attempts until now have only managed to put lines on the side of each bar, not in the middle like in the geom_errorbar helpfile.I've tried to manipulate the dodge parameters but it only made it worse.
The chart needs to stay flipped over and in the code below I used geom_linerange but geom_errorbar would be even better.
Another thing I haven't quite managed to do is to change the scale into whole numbers (without muliplying the original table ).
I've used the code below on a<-a[1:16,] (the first two groups).
When I use the same code on the full table I get even worse results with the confidence intervals.
Would anyone be able to help? Many thanks in advance.
limits <- aes(ymax = value + se, ymin=value - se)
p<-ggplot(data = a, aes(x = X1, y =value))+
geom_bar(aes(fill=X2),position = "dodge") +
scale_x_discrete(name="")+
scale_fill_manual(values=c("grey80","black","red"))+
scale_y_continuous(name="%")+
theme(axis.text.y = element_text(face='bold'),
legend.position ="top",
legend.title=element_blank())+
coord_flip()
p + geom_linerange(limits)
Try this ,
p<-ggplot(data = df, aes(x = X1, y =value,fill= X2))+
geom_bar(position=position_dodge()) +
geom_errorbar(aes(ymax = value + 2* se, ymin=value,colour = X2),position=position_dodge(.9))
p <- p + scale_x_discrete(name="")+
scale_fill_manual(values=c("grey80","black","red"))+
scale_y_continuous(name="%")+
theme(axis.text.y = element_text(face='bold'),
legend.position ="top",
legend.title=element_blank())+
coord_flip()

Resources