Stack Points in ggplot - r

I am making a dotplot using ggplot with the code and data that is below which produces the following the graph.
ggplot(data=holder, aes(x=Coef, y=CoefShort, colour=factor(Name))) + geom_point() + labs(x="Value", y="Coefficient") + scale_colour_discrete("Model")
Their is a significant amount of overplotting and I would like to create some vertical seperation between dots. But it has to be systematic, i.e. the order is always red-green-blue.
geom_stack only seems to work when the x value is the same, not when the y value is the same. I tried using the opposite axes (swapping the x and y), doing a position_dodge() then doing a coord_flip(). This only works with bars, as seen below.
ggplot(data=holder, aes(y=Coef, x=CoefShort, fill=factor(Name))) + geom_bar(position="dodge", aes(ymax=Coef)) + labs(y="Value", x="Coefficient") + scale_fill_discrete("Model") + coord_flip()
Any ideas how to achieve that stacking with geom_point()? Thanks.
The data:
structure(list(Coef = c(-3875.46969970703, 7871.08213392282,
1120.33185255098, 1510.13540851347, 1439.07714113149, 1800.92398445336,
-3760.05411752962, 8183.74295221482, 1126.98290537184, 1517.99524139857,
1442.73063836897, 1808.03721179571, -90.6507661872817, -71.7225864185226,
-103.615416254984, -732.167583256825, -1075.67574987664, -1908.56266462926,
-7362.80215630299, 8886.12888250011, 655.767448263926, 848.716877683527,
869.395903077767, 998.254438325812, -211.682481369473, -303.310032581644,
-506.1995360406, -978.697664841985, -1440.30190190734, -2325.22236024601,
2625.94998656519, 3573.68798735398, 4217.82910198788, 4534.8789695778,
4967.19941000705, 5072.02764498623, 5419.64684461491), Name = c(1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), CoefShort = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L), .Label = c("(Intercept)",
"carat", "Good", "Very Good", "Premium", "Ideal", "E", "F", "G",
"H", "I", "J", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"
), class = "factor")), .Names = c("Coef", "Name", "CoefShort"
), class = "data.frame", row.names = c(NA, -37L))

points in geom_point is zero width, so you need to set the width of dodging:
ggplot(data=holder, aes(y=Coef, x=CoefShort, colour=factor(Name))) +
geom_point(position=position_dodge(width = 0.8)) +
labs(y="Value", x="Coefficient", colour = "Model") +
coord_flip()

Related

how to show average, median, and stdv in a histogram

How would I go about showing in these histograms the average, median, and standard deviation of the data.
Here is my histogram code:
hist(PRE$Productivity...Productivité, main = "PRE", xlab = "Productivity")
hist(DBN$Productivity...Productivité, main = "DBN", xlab = "Productivity")
hist(DBG$Productivity...Productivité, main = "DBG", xlab = "Productivity")
hist(POST$Productivity...Productivité, main = "POST", xlab = "Productivity")
And here is it's output
dput(head(DBN))
structure(list(Participant.Code = c("AE1_02", "AE1_02", "AE1_02",
"AE1_02", "AE2_08", "AE2_08"), Condition = structure(c(5L, 5L,
5L, 5L, 5L, 5L), levels = c("", "DBG", "DBG DBN", "DBG POST",
"DBN", "DBN DBG", "DBN POST", "POST", "PRE", "PRE DBG", "PRE DBN"
), class = "factor"), Start.time = c("3-9-22 8:39:27", "3-9-22 16:27:44",
"3-10-22 8:48:34", "3-10-22 16:09:33", "3-18-22 8:36:15", "3-18-22 17:26:13"
), Stiffness...Raideur = c(7L, 7L, 7L, 7L, 4L, 4L), Fatigue...Fatigue = c(7L,
8L, 8L, 8L, 4L, 6L), Discomfort...Inconfort = c(7L, 7L, 7L, 7L,
3L, 6L), Happiness...Joie = c(8L, 8L, 8L, 8L, 6L, 5L), Productivity...Productivité = c(6L,
8L, 7L, 7L, 5L, 4L), Ability.to.concentrate...Capacité.de.se.concentrer = c(7L,
8L, 7L, 6L, 5L, 4L), Alertness...Vigilance = c(7L, 8L, 7L, 6L,
5L, 5L), Stress...Stress = c(6L, 8L, 7L, 6L, 5L, 5L), Back.Pain...Mal.de.dos = c(8L,
7L, 8L, 8L, 3L, 4L), Neck.Pain...Douleur.au.cou = c(5L, 4L, 7L,
7L, 3L, 4L), Head.Pain...Mal.de.tête = c(1L, 1L, 1L, 1L, 2L,
4L), Eye.Pain...Douleur.oculaire = c(1L, 1L, 1L, 1L, 3L, 4L)), row.names = c(17L,
18L, 21L, 22L, 57L, 58L), class = "data.frame")
You can use the function abline immediatly after the histogram call to add a vertical line intersecting the x axis. In this case, I am creating a line to show the location of the mean in each dataset. Then, to add the value, you can add it directly as a label or put it into a legend. I am adding some padding to the ylim so the legend or label doesn't overlap with the title. Finally, to arrange them in a similar way as you want it, you can prepare the panel using the function par():
n <- 10000
example_a <- rgamma(n, 5, 2)
example_b <- rnorm(n, 5, 2)
example_c <- rbeta(n, 5, 2)
max_a <- max(hist(example_a, plot = F)$counts)
max_b <- max(hist(example_b, plot = F)$counts)
max_c <- max(hist(example_c, plot = F)$counts)
mean_a <- mean(example_a)
mean_b <- mean(example_b)
mean_c <- mean(example_c)
par(mfrow = c(2,2)) #creates 4x4 layout
hist(example_a, main = "PRE", xlab = "Productivity",
col = "slategray1", border = "gray",
ylim = c(0, max_a + 200))
abline(v = mean_a, col = "darkred", lwd = 3, lty = 2)
legend("topright", legend = c("Mean", round(mean_a, 3)),
lwd = c(3, NA), lty = c(2, NA), col = c("darkred", NA))
hist(example_b, main = "DBN", xlab = "Productivity",
col = "slategray1", border = "gray",
ylim = c(0, max_b + 200))
abline(v = mean_b, col = "forestgreen", lwd = 3, lty = 2)
text(x = mean_b - 2, y = 1990, paste("Mean = ", round(mean_b, 3)))
hist(example_c, main = "DBG", xlab = "Productivity",
col = "slategray1", border = "gray",
ylim = c(0, max_c + 200))
abline(v = mean(example_c), col = "purple4", lwd = 3, lty = 2)
legend("topleft", legend = c("Mean", round(mean_c, 3)),
lwd = c(3, NA), lty = c(2, NA), col = c("darkred", NA))
And that gives us the following plots:

Space between categories on the y-axis

I'm plotting a graph of odds-ratios and confidence intervals, and would like to have space in between the different categories of variables on the y-axis, but I'm having trouble doing that.
Here is some code that I have tried
final_matrix = read.table("matrix", sep = "\t", header = T)
thecolnames = c('Coefficients', 'CI2.5', 'CI97.5', 'Demographics', 'Categories')
colnames(final_matrix) = thecolnames
final_matrix$Categories = factor(final_matrix$Categories, levels = final_matrix$Categories)
ggplot(final_matrix, aes(x = final_matrix$Coefficients, y = final_matrix$Categories, color=Demographics)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = final_matrix$CI2.5, xmin = final_matrix$CI97.5), size = .5, height =
.2, color = "gray50") +
geom_point(size = 1.5) +
theme(panel.grid.minor = element_blank()) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Association between Coefficients and Categories)
Final matrix looks like:
structure(list(Coefficients = c(1, 1.030507438, 1.044036099,
1, 0.9733293067, 1, 2.33127416, 2.402926091, 2.422669367, 2.395563322,
1, 1.005581977, 1.006818225, 1, 1.021092986, 1.019769848, 0.9455416249,
1.010520081, 1, 0.9912335776, 0.988674891, 1.006406292, 1, 1.002857188,
0.9963459983, 1.005356995, 1, 0.9990484426, 1, 1.025390984, 1,
1.017962091, 1, 0.9805242864), CI2.5 = c(1, 0.9913713797, 0.9998913413,
1, 0.9298311216, 1, 2.189771461, 2.268230465, 2.284251227, 2.257308777,
1, 0.9630167448, 0.9481349102, 1, 0.9800985942, 0.9841991871,
0.9056186125, 0.9576109398, 1, 0.9562607091, 0.9532485042, 0.9664533176,
1, 0.9477868449, 0.9509813538, 0.9519718969, 1, 0.9724519214,
1, 0.9631787377, 1, 0.9547918105, 1, 0.9186602875), CI97.5 = c(1,
1.071188458, 1.090129827, 1, 1.018862369, 1, 2.481920741, 2.545620424,
2.569475192, 2.542285613, 1, 1.050028588, 1.069133651, 1, 1.063802043,
1.056626094, 0.9872245912, 1.066352515, 1, 1.027485492, 1.025417859,
1.048010913, 1, 1.061127346, 1.043874672, 1.061735847, 1, 1.026372378,
1, 1.091621553, 1, 1.0853118, 1, 1.046554302), Demographics = structure(c(5L,
5L, 5L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 4L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L), .Label = c("Age", "Disease", "Education", "Employment", "Financial",
"Health", "Insurance", "Race"), class = "factor"), Categories = structure(1:34, .Label = c("Extremely Difficult",
"Somewhat Difficult", "Not Difficult", "No Disease", "Disease",
"Overall Health (<=3)", "Overall Health (4)", "Overall Health (5)",
"Overall Health (6)", "Overall Health (7)", "18-29", "30-64",
"65+", "Full-Time", "Part-Time", "Unemployed/Homemaker ", "Disability",
"Retired", "Less than HS", "HS Grad", "Some College / Vocational",
"College Degree / PhD", "Private", "Other Public", "Medicaid",
"Medicare", "Non-Hispanic", "Hispanic", "Non-White", "White ",
"Non-Black", "Black", "Non-Other", "Other"), class = "factor")), row.names = c(NA,
-34L), class = "data.frame")
What I get is something like
a --o---
b -o-
c --o--
d -o-
whereas, a and b might be part of one "demographic" category, and c and d might be a part of another "demographic category". Although I can get them to be different colors, I'm struggling to space them into something like:
Demographic 1
a --o---
b -o-
Demographic 2
c --o--
d -o-
I am not sure if you will be happy with that try to run it and comment is that what you want?
final_matrix<-structure(list(Coefficients = c(1, 1.030507438, 1.044036099, 1, 0.9733293067, 1, 2.33127416, 2.402926091, 2.422669367, 2.395563322, 1, 1.005581977, 1.006818225, 1, 1.021092986, 1.019769848, 0.9455416249, 1.010520081, 1, 0.9912335776, 0.988674891, 1.006406292, 1, 1.002857188, 0.9963459983, 1.005356995, 1, 0.9990484426, 1, 1.025390984, 1, 1.017962091, 1, 0.9805242864), CI2.5 = c(1, 0.9913713797, 0.9998913413, 1, 0.9298311216, 1, 2.189771461, 2.268230465, 2.284251227, 2.257308777, 1, 0.9630167448, 0.9481349102, 1, 0.9800985942, 0.9841991871, 0.9056186125, 0.9576109398, 1, 0.9562607091, 0.9532485042, 0.9664533176, 1, 0.9477868449, 0.9509813538, 0.9519718969, 1, 0.9724519214, 1, 0.9631787377, 1, 0.9547918105, 1, 0.9186602875), CI97.5 = c(1, 1.071188458, 1.090129827, 1, 1.018862369, 1, 2.481920741, 2.545620424, 2.569475192, 2.542285613, 1, 1.050028588, 1.069133651, 1, 1.063802043, 1.056626094, 0.9872245912, 1.066352515, 1, 1.027485492, 1.025417859, 1.048010913, 1, 1.061127346, 1.043874672, 1.061735847, 1, 1.026372378, 1, 1.091621553, 1, 1.0853118, 1, 1.046554302),
Demographics = structure(c(5L, 5L, 5L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L),
.Label = c("Age", "Disease", "Education", "Employment", "Financial", "Health", "Insurance", "Race"), class = "factor"),
Categories = structure(1:34,
.Label = c("Extremely Difficult", "Somewhat Difficult", "Not Difficult", "No Disease", "Disease", "Overall Health (<=3)", "Overall Health (4)", "Overall Health (5)", "Overall Health (6)", "Overall Health (7)", "18-29", "30-64", "65+", "Full-Time", "Part-Time", "Unemployed/Homemaker ", "Disability", "Retired", "Less than HS", "HS Grad", "Some College / Vocational", "College Degree / PhD", "Private", "Other Public", "Medicaid", "Medicare", "Non-Hispanic", "Hispanic", "Non-White", "White ", "Non-Black", "Black", "Non-Other", "Other"), class = "factor")),
row.names = c(NA, -34L), class = "data.frame")
final_matrix<-data.frame(final_matrix)
ggplot(final_matrix,aes(x=Coefficients,y=Categories,color=Demographics,group=Categories))+
geom_vline(aes(xintercept=1),size=.25,linetype="dashed")+
geom_errorbarh(aes(xmax = final_matrix$CI2.5, xmin = final_matrix$CI97.5), size = .5, height = .2, color = "gray50") +
geom_point(size = 1.5) +
theme(panel.grid.minor = element_blank()) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Association between Coefficients and Categories") + facet_wrap(Demographics~.,scales="free",ncol=1)
ggplot(final_matrix,aes(x=Coefficients,y=Categories,color=Demographics))+
geom_vline(aes(xintercept=1),size=.25,linetype="dashed")+
geom_errorbarh(aes(xmax=CI2.5,xmin=CI97.5),size=.5,height=.2,color="gray50")+
geom_point(size=1.5)+
theme(panel.grid.minor=element_blank())+
labs(y="",x="Odds ratio",title="Association between Coefficients and Categories")+
facet_wrap(Demographics~.,scales="free_y",ncol=1)

How to add geom_line to stacked barplot in r

Below is my code. I am tried to add one line (data from a different csv file) on top of a stacked barplot however it wont work, the error says "object variable not found". Without added the geom_line the stacked barplot works so I assume it is the line that is creating the issue. Any ideas on how I fix this?
a <- read.csv("data.csv", header=TRUE, sep=",")
line1 <- read.csv("data1.csv", header=TRUE, sep=",")
line2 <- data.frame(line1)
library(reshape2)
c <- melt(a, id.var="day")
library(ggplot2)
a <- ggplot(c, aes(x=day, y=value, fill=variable)) +
geom_bar(stat="identity", aes(x=day, y=value), width=0.7) +
geom_line(data=line2, aes(x=day, y=value), color="black", stat="identity")
+
scale_fill_manual(values = c("black", "grey47", "grey")) +
scale_x_continuous(breaks = round(seq(min(m$day), max(m$day), by = 1),0))
print(a)
The following is a complete code example to produce the graph below.
I have changed your variables' names, in order to make them more consistent. You had named both the data.frame in file "data.csv" and the result of your ggplot instruction a.
library(reshape2)
library(ggplot2)
a <- read.csv("~/data.csv")
line1 <- read.csv("~/data2.csv")
long <- melt(a, id.var = "day")
g <- ggplot(long, aes(x = day, y = value)) +
geom_bar(aes(x = day, y = value, fill = variable),
stat = "identity", width = 0.7) +
geom_line(data = line1,
aes(x = day, y = value),
color = "black") +
scale_fill_manual(values = c("black", "grey47", "grey")) +
scale_x_continuous(breaks = min(long$day):max(long$day))
print(g)
Data in dput format.
a <-
structure(list(day = 1:31, emigration = c(6L, 6L, 6L, 6L, 5L,
3L, 1L, 9L, 8L, 7L, 6L, 4L, 3L, 1L, 2L, 4L, 5L, 6L, 8L, 7L, 5L,
4L, 1L, 2L, 4L, 9L, 8L, 7L, 6L, 4L, 3L), security = c(5L, 5L,
5L, 5L, 6L, 6L, 8L, 9L, 9L, 9L, 8L, 8L, 5L, 7L, 7L, 6L, 5L, 5L,
4L, 3L, 2L, 2L, 2L, 2L, 4L, 9L, 7L, 6L, 4L, 3L, 2L), checkin = c(4,
6, 9, 1, 3, 5, 7, 9, 8, 6, 4, 2, 1, 3, 4, 5, 6, 7, 8, 8, 2, 1,
2, 3, 4, 5, 7, 8, 9, 1, 1)), class = "data.frame",
row.names = c(NA, -31L))
line1 <-
structure(list(day = 1:31, value = c(12, 11, 10, 8, 7, 6, 6,
6, 7, 8, 14, 6, 6, 6, 8, 8, 10, 10, 12, 12, 12, 13, 13, 14, 15,
15, 10, 10, 10, 10, 12)), class = "data.frame",
row.names = c(NA, -31L))
Based on your comments of your data structure, I suppose it might help joining your dataframes first and then building the plot using one dataset. You can try:
library(dplyr)
c <- c %>%
left_join(line2 %>%
rename(value_line2 = value),
by="day")
Then adjust geom_line():
geom_line(data=c, aes(x=day, y=value_line2), color="black", stat="identity")
This might help. Please tell me if joining the data doesn't work as intended.
In case it wasn't clear, this is what I meant in my comment above:
library(ggplot2)
a <- ggplot(c, aes(x=day, y=value)) +
geom_bar(stat="identity", aes(x=day, y=value, fill=variable), width=0.7) +
geom_line(data=line2, aes(x=day, y=value), color="black", stat="identity")

Order x axis after factor variable with fewer levels

I have the following data frame:
structure(list(Substance = c("Cefotaxime", "Cefepim", "Chloramphenicol",
"Sulfamethoxazole", "Ampicillin", "Ampicillin", "Tetracycline",
"Cefotaxime", "Trimethoprim", "Cefepim", "Cefepim", "Sulfamethoxazole",
"Ceftazidime", "Nalidixic acid", "Cefepim", "Ceftazidime", "Ampicillin",
"Ceftazidime", "Cefotaxime", "Ceftazidime"), Species = c("Cattle",
"Chicken", "Cattle", "Cattle", "Cattle", "Cattle", "Cattle",
"Pig", "Cattle", "Cattle", "Horse", "Horse", "Pig", "Cattle",
"Pig", "Pig", "Cattle", "Cattle", "Pig", "Horse"), gene = c("AmpC",
"blaCMY-2", "blaSHV-12", "blaCMY-2", "AmpC", "blaCMY-2", "blaCMY-2",
"AmpC", "blaSHV-12", "blaSHV-12", "blaCTX-M Group 1", "blaCTX-M Group 1",
"AmpC", "blaSHV-12", "blaCTX-M-15", "blaCTX-M-15", "AmpC", "AmpC",
"blaCMY-2", "AmpC"), n = c(3, 6, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1,
3, 1, 1, 1, 1, 1, 1, 1), group = c(8L, 3L, 5L, 9L, 8L, 9L, 9L,
13L, 5L, 5L, 2L, 2L, 13L, 5L, 16L, 16L, 7L, 7L, 15L, 1L), value = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-20L), .Names = c("Substance", "Species", "gene", "n", "group",
"value"))
And I have this plot:
gene_palette <- c("AmpC" = "#b2182b",
"blaCMY-2" = "#ef8a62",
"blaCTX-M-15" = "#fddbc7",
"blaCTX-M Group 1" = "#d1e5f0",
"blaSHV-12" = "#67a9cf",
"ESBL" = "#2166ac")
library(ggplot2)
ggplot(test, aes(factor(group), Substance, fill = gene))+
geom_point(pch = 21, size = 5)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
scale_fill_manual(values = gene_palette)+
theme_classic()
This produces the following plot:
Is there a way to sort the order of "groups" on the x-axis so that each gene will be next to eachother on the x axis, in the order listed in the legend? This way, the color for each gene type will be next to each other.
I tried the solutions presented here, but since the factor variables in "gene" is fewer and doesn't match the ones in "group", that didn't work (introduced NA's)
ggplot(test, aes(factor(group,
levels = unique(test[order(test$gene, test$group), "group", drop = TRUE])),
Substance, fill = gene))+
geom_point(pch = 21, size = 5)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
scale_fill_manual(values = gene_palette)+
theme_classic()

Add text to plot with facetted bar chart

My question is related to this question. I want "2014" in the 4-year facet. I tried to repeat but my code doesn't give what I want.
Annotating text on individual facet in ggplot2
This is my data
structure(list(Rot = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2-year",
"3-year", "4-year"), class = "factor"), Rot.Herb = structure(c(3L,
3L, 4L, 4L, 13L, 13L, 14L, 14L, 5L, 5L, 6L, 6L, 9L, 9L, 10L,
10L, 15L, 15L, 16L, 16L, 1L, 1L, 2L, 2L, 7L, 7L, 8L, 8L, 11L,
11L, 12L, 12L, 17L, 17L, 18L, 18L), .Label = c("A4-conv", "A4-low",
"C2-conv", "C2-low", "C3-conv", "C3-low", "C4-conv", "C4-low",
"O3-conv", "O3-low", "O4-conv", "O4-low", "S2-conv", "S2-low",
"S3-conv", "S3-low", "S4-conv", "S4-low"), class = "factor"),
variable = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Diversity",
"Evenness"), class = "factor"), N = c(4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4), value = c(0.78537789925, 0.613408315,
1.305194686, 0.79519430975, 0.4481728555, 0.30608817425,
1.20978861475, 0.8580643725, 0.92387324875, 0.630166121,
0.945954185, 0.561172324, 1.43952456275, 0.8616864655, 1.23679146725,
0.831737624, 1.033474108, 0.80689293925, 0.9910142125, 0.79342098075,
1.175512223, 0.6293940245, 0.981614832, 0.62342189825, 1.351710013,
0.805075937, 1.6598348325, 0.7983622545, 1.01606920875, 0.5751418795,
1.0500365255, 0.56408326225, 1.07162937725, 0.6756859865,
0.45699816625, 0.44444147325), sd = c(0.354077266902404,
0.208934910331856, 0.169501822767995, 0.0774319459391732,
0.737366460962239, 0.40697977697835, 0.494107033311986, 0.11906912863268,
0.491492768082854, 0.34236657107712, 0.219739438843007, 0.205905593411204,
0.319301583035043, 0.0696484379979274, 0.0563293598951725,
0.0978700910274188, 0.446850757364563, 0.175073468716825,
0.426859848850874, 0.180469101499932, 0.526842123835502,
0.200470277385505, 0.574885944755375, 0.27189545397305, 0.39621771945215,
0.150798258847229, 0.275863362594154, 0.111178397407429,
0.254811233135664, 0.158920851982914, 0.198698241334475,
0.0730606635175717, 0.717706309307313, 0.453776579066358,
0.574276936403411, 0.513758415496589), se = c(0.177038633451202,
0.104467455165928, 0.0847509113839974, 0.0387159729695866,
0.368683230481119, 0.203489888489175, 0.247053516655993,
0.0595345643163399, 0.245746384041427, 0.17118328553856,
0.109869719421504, 0.102952796705602, 0.159650791517521,
0.0348242189989637, 0.0281646799475863, 0.0489350455137094,
0.223425378682282, 0.0875367343584126, 0.213429924425437,
0.090234550749966, 0.263421061917751, 0.100235138692753,
0.287442972377688, 0.135947726986525, 0.198108859726075,
0.0753991294236146, 0.137931681297077, 0.0555891987037145,
0.127405616567832, 0.0794604259914568, 0.0993491206672376,
0.0365303317587859, 0.358853154653656, 0.226888289533179,
0.287138468201705, 0.256879207748294), ci = c(0.563415944919255,
0.332462066715199, 0.26971522480343, 0.123211505132525, 1.1733145846647,
0.647595643784969, 0.786234551289211, 0.189465554245211,
0.782074671929471, 0.544781614588516, 0.349654482635521,
0.327641747494367, 0.508080071600555, 0.110826207087643,
0.089632581638694, 0.155733154793995, 0.71103927089404, 0.278580956835532,
0.679229274424713, 0.287166612643164, 0.838323385234058,
0.318992946792351, 0.914771825423139, 0.432646341459985,
0.630470808679215, 0.23995368085579, 0.438960169525453, 0.176909640028318,
0.40546153371869, 0.252878539112781, 0.316173242000635, 0.116255819336536,
1.14203089616693, 0.722059798737006, 0.91380275723334, 0.817504285602766
)), .Names = c("Rot", "Rot.Herb", "variable", "N", "value",
"sd", "se", "ci"), row.names = c(NA, -36L), class = "data.frame")
and the code to graph
p <- ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable)))+
geom_bar(stat="identity", position="dodge")+
scale_fill_brewer(palette = "Set1")+
theme_bw() +
theme(panel.grid.major=element_blank()) +
facet_grid(~Rot, scales = "free_x", space="free_x")+
theme(legend.title=element_blank(),legend.text=element_text(size=20),legend.position="top")+
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9))+
xlab("\nTreatment") +
theme(axis.title = element_text(size=24,face="bold", vjust=4), axis.text.x = element_text(size=20,angle = 90, hjust = 1)) +
ylab("Shannon's H' and E'") +
theme(axis.title = element_text(size=24,face="bold", vjust=2), axis.text.y = element_text(size=20, color="black"))+
theme(strip.text.x = element_text(colour = "black", size = 20), strip.background = element_rect(fill = "white"))
produced graph (please don't mind the "2014" on the y-axis).
New code to annotate 2014, with help from eipi10
ann_text <- data.frame(x = "S4-conv",y = 1.75,lab = "2014", Rot.Herb=NA,
value=NA, variable=NA,
N=NA, sd=NA, se=NA, ci=NA,
Rot = factor("4-year",levels = c("2-year","3-year","4-year")))
I got an error saying Error: Discrete value supplied to continuous scale after I run p + geom_text(data = ann_text,label = "2014"). Please see what have been wrong with my code and data format. Thanks.
It turns out the issue is that when you include value=NA in ann_text it gets interpreted as logical (rather than numeric, which is its mode in Shannon.long2), causing the error because ggplot expects a numeric variable rather than a categorical one. Set value=NA_real_ (in addition to NA, R has class-specific missing value constants; see ?NA for more info) in ann_text to ensure value is interpreted as numeric and resolve the error. Or set value to any number, e.g., value=0.
In the example below, I've removed all of the theme and lab statements to shorten the code down to the essentials:
p = ggplot(Shannon.long2, aes(x=Rot.Herb, y=value, fill=factor(variable))) +
geom_bar(stat="identity", position="dodge") +
geom_errorbar(aes(ymin=value-se, ymax=value+se), size=0.5, width=.25,position=position_dodge(.9)) +
facet_grid(~Rot, scales = "free_x", space="free_x")
ann_text <- data.frame(x = "S4-conv", y = 1.75, lab = "2014", Rot.Herb=NA,
value=NA_real_, variable=NA)
p + geom_text(data = ann_text, aes(label=lab, x, y))
Note that you also need to feed x and y values to geom_text to provide the label location.
Another option would be to just use the same x and y variable names as in your original data frame, since ggplot already knows these names and has scaled the graph based on them. Now the only missing column we need to add is variable:
ann_text <- data.frame(Rot.Herb = "S4-conv", value = 1.75, lab = "2014", variable=NA)
p + geom_text(data = ann_text, aes(label=lab, Rot.Herb, value))

Resources