Plot histogram in ggplot - r

I have this dataframe.
dput(EF_Lat_Am)
structure(list(V1 = structure(c(4L, 3L, 5L, 6L, 1L, 2L, 7L, 8L,
4L, 3L, 5L, 6L, 1L, 2L, 7L, 8L), .Label = c("Crop Agriculture",
"Mining", "Mixed Agriculture", "Other land use", "Pasture", "Tree crops",
"Urban", "Water"), class = "factor"), V2 = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Emission Factor", class = "factor"),
V3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2000", "2005"), class = "factor"),
V4 = c(77.0109486116396, 69.2454348145657, 73.684103657833,
71.0430911289891, 43.136201172115, 117.358146800995, 77.4653952935238,
89.0966064142874, 71.8286578413912, 67.9099357961953, 76.7438444998728,
67.4818461466729, 50.6468079101972, 117.799797611894, 78.7347377710757,
81.3020943196897)), .Names = c("V1", "V2", "V3", "V4"), row.names = c(NA,
16L), class = "data.frame")
As you can see for the years 2000 and 2005, I have an emission factor value for each type of land use. I want to plot an histogram with the type of land use in the x axis and the emission factors in the y axis. In addition, for each land use I want the bars for the two years to be adjacents. I also want a legend showing for which years correspond the bars (either 2000 or 2005). Thanks for your help.

Here is the answer.
ggplot(EF_Lat_Am, aes(x=V1, y = V4, fill=V3, width=.85)) + geom_bar(position="dodge", stat="identity") +
labs(x = "", y = "EF (T/ha)") +
theme(axis.text=element_text(size=16),axis.title=element_text(size=20),
legend.title=element_text(size=20, face='bold'),legend.text=element_text(size=20), axis.line = element_line(colour = "black")) +
scale_fill_grey("Period") + scale_y_continuous(limits=c(0,120)) + theme_classic(base_size = 20, base_family = "")

Related

Boxplot troubleshooting, adding another variable factor

I have constructed a nice looking boxplot in r for data looking at the production of methane under different incubation temperatures. The plot looks at the production of CH4 by the patch from which the sample was collected.
However there is a temperature variable. Samples were split with 50% incubated at 10* and 50% at 26*
This is my current plot:
Methanogenesis_Data=read.csv("CO2-CH4 Rates.csv")
attach(Methanogenesis_Data)
summary(Methanogenesis_Data)
str(Methanogenesis_Data)
boxplot(CH4rate~Patch, data = Methanogenesis_Data, xlab="Patch",
ylab="CH4 µmol g-1 hr-1 ",
col=c("lightblue","firebrick1"), main = "CH4 Production After
Incubation", frame.plot=FALSE)
This was my previous plot:
boxplot(CH4rate~Patch+Temperature, data = Methanogenesis_Data,
xlab="Patch", ylab="CH4 µmol g-1 hr-1 ",
col=c("lightblue","firebrick1"), main = "CH4 Production After
Incubation", frame.plot=FALSE)
Here is the data:
structure(list(Patch = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gravel", "Macrophytes",
"Marginal"), class = "factor"), Temperature = structure(c(2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cold",
"Warm"), class = "factor"), CH4rate = c(0.001262595, 0.00138508,
0.001675944, 0.001592354, 0.002169233, 0.001772964, 0.002156633,
0.002864403, 0.002301383, 0.002561042, 0.005189598, 0.004557227,
0.008484851, 0.006867866, 0.007438633, 0.005405327, 0.006381582,
0.008860084, 0.007615417, 0.007705906, 0.009198508, 0.00705233,
0.007943024, 0.008319768, 0.010362114, 0.007822153, 0.010339339,
0.009252302, 0.008249555, 0.008197657), CO2rate = c(0.002274825,
0.002484866, 0.003020209, 0.00289133, 0.003927232, 0.003219346,
0.003922613, 0.005217026, 0.00418674, 0.00466427, 0.009427322,
0.008236453, 0.015339532, 0.012494729, 0.013531303, 0.009839847,
0.011624428, 0.016136746, 0.0138831, 0.014051034, 0.016753211,
0.012780956, 0.01445912, 0.01515584, 0.01883252, 0.014249452,
0.018849478, 0.016863299, 0.015045964, 0.014941168)), .Names =
c("Patch",
"Temperature", "CH4rate", "CO2rate"), class = "data.frame", row.names =
c(NA,
-30L))
What I am attempting to do is have my current plot, but with boxes in the boxplot representing both warm and cold temperatures within the 3 Patch areas.
Boxplot of CH4 production by Patch inc. Temp <--- This is what I want to do!
Thank You for any assistance!!
You could try it using ggplot2:
library(tidyverse)
Methanogenesis_Data %>%
ggplot(aes(x = Patch, y = CH4rate, fill = Temperature)) +
geom_boxplot() +
scale_fill_manual(values = c("lightblue","firebrick1")) +
scale_x_discrete(drop = F) +
theme_minimal()+
labs(y = 'CH4 µmol g-1 hr-1', title = "CH4 Production After Incubation")
Or, if you so wish, try it with base-R:
boxplot(CH4rate~Temperature + Patch, data = Methanogenesis_Data, xlab="Patch",
ylab="CH4 µmol g-1 hr-1 ",
col=c("lightblue","firebrick1"), main = "CH4 Production After
Incubation", frame.plot=FALSE,xaxt = 'n')
legend('topleft', legend = c('cold', 'warm'), fill = c("lightblue","firebrick1"))
axis(1,at = c(1.5,3.5,5.5), labels = levels(Methanogenesis_Data$Patch))

Creating a box and whisker plot with ggplot() troubleshooting

UPDATED:
Data has now been updated to full chemistry values as opposed to mean values.
I am attempting to create a box and whisker plot in r, on a very small dataset. My data is not behaving itself or I am missing some glaringly obvious error.
This is the code i have for making said plot
library(ggplot2)
Methanogenesis_Data=read.csv("CO2-CH4 Rates.csv")
attach(Methanogenesis_Data)
summary(Methanogenesis_Data)
str(Methanogenesis_Data)
boxplot(CH4rate~Patch+Temperature, data = Methanogenesis_Data,
xlab="Patch", ylab="CH4 Production")
cols<-c("red", "blue")
From this small dataset.
structure(list(Patch = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gravel", "Macrophytes",
"Marginal"), class = "factor"), Temperature = structure(c(2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cold",
"Warm"), class = "factor"), CH4rate = c(0.001262595, 0.00138508,
0.001675944, 0.001592354, 0.002169233, 0.001772964, 0.002156633,
0.002864403, 0.002301383, 0.002561042, 0.005189598, 0.004557227,
0.008484851, 0.006867866, 0.007438633, 0.005405327, 0.006381582,
0.008860084, 0.007615417, 0.007705906, 0.009198508, 0.00705233,
0.007943024, 0.008319768, 0.010362114, 0.007822153, 0.010339339,
0.009252302, 0.008249555, 0.008197657), CO2rate = c(0.002274825,
0.002484866, 0.003020209, 0.00289133, 0.003927232, 0.003219346,
0.003922613, 0.005217026, 0.00418674, 0.00466427, 0.009427322,
0.008236453, 0.015339532, 0.012494729, 0.013531303, 0.009839847,
0.011624428, 0.016136746, 0.0138831, 0.014051034, 0.016753211,
0.012780956, 0.01445912, 0.01515584, 0.01883252, 0.014249452,
0.018849478, 0.016863299, 0.015045964, 0.014941168)), .Names = c("Patch",
"Temperature", "CH4rate", "CO2rate"), class = "data.frame", row.names =
c(NA,
-30L))
The plot I get as output is good, however I would like the Variables on the X axis to simply display "Gravel" "Macrophytes" "Marginal" as opposed to each of those variables with Warm and Cold. Thanks for any assistance
THIS IS WHAT I AM TRYING TO ACHEIVE -----> Exact Boxplot I want to create
Following your update with an example graph :
I have also included the formating for the legend position. If you want to edit the y axis label to include subscript I would suggest you read over this. I have included a blank title for relabelling.
test <- structure(list(Patch = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gravel", "Macrophytes",
"Marginal"), class = "factor"), Temperature = structure(c(2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("Cold",
"Warm"), class = "factor"), CH4rate = c(0.001262595, 0.00138508,
0.001675944, 0.001592354, 0.002169233, 0.001772964, 0.002156633,
0.002864403, 0.002301383, 0.002561042, 0.005189598, 0.004557227,
0.008484851, 0.006867866, 0.007438633, 0.005405327, 0.006381582,
0.008860084, 0.007615417, 0.007705906, 0.009198508, 0.00705233,
0.007943024, 0.008319768, 0.010362114, 0.007822153, 0.010339339,
0.009252302, 0.008249555, 0.008197657), CO2rate = c(0.002274825,
0.002484866, 0.003020209, 0.00289133, 0.003927232, 0.003219346,
0.003922613, 0.005217026, 0.00418674, 0.00466427, 0.009427322,
0.008236453, 0.015339532, 0.012494729, 0.013531303, 0.009839847,
0.011624428, 0.016136746, 0.0138831, 0.014051034, 0.016753211,
0.012780956, 0.01445912, 0.01515584, 0.01883252, 0.014249452,
0.018849478, 0.016863299, 0.015045964, 0.014941168)), .Names = c("Patch",
"Temperature", "CH4rate", "CO2rate"), class = "data.frame", row.names =
c(NA,
-30L))
Now I will create two data sets one for each graph just for simplicity you could leave them combined and facet but for formatting purposes this might be easier.
CH4rate <- test %>%
gather("id", "value", 3:4) %>%
filter(id == "CH4rate")
CO2rate <- test %>%
gather("id", "value", 3:4) %>%
filter(id == "CO2rate")
First plot:
ggplot(CH4rate) +
geom_boxplot(mapping = aes(x = Patch, y = value, fill=factor(Temperature, levels = c("Warm", "Cold")))) +
theme(legend.position = c(0.15, 0.9), panel.background = element_rect(fill = "white", colour = "grey50")) +
labs(title = "Title of graph", x="Patch Type", y = "CH4rate") +
scale_fill_manual(name = "", values = c("orange", "light blue")
, labels = c("Cold" = "Incubated at 10˙C", "Warm" = "Incubated at 26˙C"))
Second plot:
ggplot(CO2rate) +
geom_boxplot(mapping = aes(x = Patch, y = value, fill=factor(Temperature, levels = c("Warm", "Cold")))) +
theme(legend.position = c(0.15, 0.9), panel.background = element_rect(fill = "white", colour = "grey50")) +
labs(title = "Title of graph", x="Patch Type", y = "CO2rate") +
scale_fill_manual(name = "", values = c("orange", "light blue")
, labels = c("Cold" = "Incubated at 10˙C", "Warm" = "Incubated at 26˙C"))

geom_bar & multiple variables

I am having trouble getting my plots to work, I have multiple categorical variables by which I want to color by one, and facet by another. However, R keeps adding the "values" (I used melt) for the same variables together instead. It works when I only have one variable.
Here is my plot with one variable
Here is my plot with two variables, you can see the adding that is happening
simple dataframe
Here is my code:
library(reshape2)
library(ggplot2)
test2 <- structure(list(SampleID = c(12.19, 12.22, 13.1, 12.19, 12.22,
13.1, 12.19, 12.22, 13.1, 12.19, 12.22, 13.1), patient = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), type = structure(c(1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("L",
"T"), class = "factor"), timepoint = structure(c(1L, 2L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L), .Label = c("1", "2"), class = "factor"),
Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "D", class = "factor"), variable = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("A",
"B", "C", "D", "E", "F", "G", "H", "I"), class = "factor"),
value = c(2L, 5L, 6L, 25L, 18L, 12L, 6L, 10L, 15L, 21L, 23L,
33L)), .Names = c("SampleID", "patient", "type", "timepoint",
"Group", "variable", "value"), row.names = c(NA, 12L), class = "data.frame")
ggplot(test2, aes(test2$variable, test2$value, fill=test2$timepoint)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values=c("rosybrown1", "steelblue2", "gray")) +
labs(x="Category", y="Count", title = paste0("Sample ", as.character(unique(test2$patient)) , " - " , as.character(unique(test2$Group)))) +
facet_wrap(~test2$type) +
theme(text = element_text(size=15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5, size = 7))
If I am understanding right, it looks like you just need to give the scales option to facet_wrap like so:
facet_wrap(~type, scales = "free_x")

Bar chart showing NA bar when there are no NA values

My visualisation is showing an NA bar chart despite the fact that I have imputed all NA values in my incomeLev column and explicitly removed all NA values from the mental health (which is in my stacked bar visualisation)
brfss2013$mentalHealth <- forcats::fct_explicit_na(brfss2013$mentalHealth, na_level = "Missing")
brfss2013$incomeLev <- as.factor(brfss2013$incomeLev)
brfss2013 <- subset(brfss2013, !is.na(incomeLev))
brfss2013 %>%
add_count(incomeLev) %>%
rename(count_inc = n) %>%
count(incomeLev, mentalHealth, count_inc) %>%
rename(count_mentalHealth = n) %>%
mutate(percent= count_mentalHealth / count_inc) %>%
mutate(incomeLev = factor(incomeLev,
levels=c('0-$20k','25-$35k','35-$50k','50-$75k','>$75k')))%>%
ggplot(aes(x= incomeLev,
y= count_mentalHealth,
group= mentalHealth)) +
xlab('Annual Income')+ylab('Number of People')+
geom_bar(aes(fill=mentalHealth),
stat="identity",na.rm=TRUE)+
# Using the scales package does the percent formatting for you
geom_text(aes(label = scales::percent(percent)),position = position_stack(vjust = 0.5))+
theme_minimal()
Here is a sample of my data:
brfss2013<-structure(list(incomeLev = structure(c(5L, 1L, 1L, 5L, 4L, 1L,
1L, 4L, 1L, 3L), .Label = c(">$75k", "0-$20k", "25-$35k", "35-$50k",
"50-$75"), class = "factor"), healtheat = c(4.66, 1.68, 2.37,
1.85, 2.5, 3, 3.66, 4.27, 2.72, 1.72), X_age_g = structure(c(5L,
4L, 5L, 5L, 6L, 4L, 3L, 5L, 4L, 6L), .Label = c("Age 18 to 24",
"Age 25 to 34", "Age 35 to 44", "Age 45 to 54", "Age 55 to 64",
"Age 65 or older"), class = "factor"), employ1 = structure(c(7L,
1L, 1L, 7L, 7L, 1L, 1L, 7L, 7L, 5L), .Label = c("Employed for wages",
"Self-employed", "Out of work for 1 year or more", "Out of work for less than 1 year",
"A homemaker", "A student", "Retired", "Unable to work"), class = "factor"),
renthom1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L), .Label = c("Own", "Rent", "Other arrangement"), class = "factor"),
sex = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("Male",
"Female"), class = "factor"), physLev = structure(c(3L, 1L,
3L, 1L, 2L, 1L, 2L, 1L, 2L, 2L), .Label = c("0-200", "200-500",
"500-1000", "1000-2000", "2000-4000", "4000-10000", ">10000"
), class = "factor"), mentalHealth = structure(c(5L, 1L,
1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L), .Label = c("Excellent",
"Good", "Ok", "Bad", "Very Bad", "Missing"), class = "factor")), row.names = c(NA,
10L), class = "data.frame")

how can I add multiple pvalues to ggplot grouped boxplot

I have boxplot and I would like to add pvalues for 4 comparisons across two factors.
Here is the data set:
dput(CauloQ_datMannot)
structure(list(V1 = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L,
3L, 4L, 4L, 4L), .Label = c("B", "BF", "BFi ", "Bi"),
class = "factor"),
variable = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L), .Label = c("V2", "V3", "V4"), class = "factor"),
value = c(0.00051, 0.00055, 0.00056, 0.00074, 0.00079, 0.00083,
0.00093, 0.00082, 0.00073, 0.0011, 0.00113, 0.00098),
Location = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Root", class = "factor"),
Bean = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "Bean", class = "factor"), Fungi = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("M+",
"M-"), class = "factor"), Insect = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Insect",
"NI"), class = "factor")), .Names = c("V1", "variable", "value",
"Location", "Bean", "Fungi", "Insect"), row.names = c(NA, -12L
), class = "data.frame")
Here is my current graph:
ggplot(CauloQ_datMannot,aes(x=Insect,y=value,fill=Fungi))+geom_boxplot()+
guides(fill=guide_legend("Metarhizium")) +
ggtitle("Caulobacter qPCR")+
scale_x_discrete(labels= c("I+","I-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
I have installed ggpubr, and have read up on compare_stat_means, but can't figure out how to make comparisons involving the two factors. That is I want 4 pvalues
M+/I+ vs M-/I+, and M+/I- vs M-/I-, and I+/M+ vs I-/M+, and I+/M- vs I-/M-
Any help is appreciated. thanks
>
Great. Now thanks to Jimbou, I have the following plot.
d %>% unite(groups, Insect, Fungi, remove = F) %>%
{ggplot(.,aes(groups, value, fill= Fungi)) +
geom_boxplot() + # ggbeeswarm::geom_beeswarm()+
ggsignif::geom_signif(comparisons = combn(sort(unique(.$groups)),2, simplify = F),
step_increase = 0.1,test='t.test')}
However, I would like to re-order the boxes, ie. with all I+ ones first (M+ first within that). I tried re-ordering the levels and then manually the rows, that neither worked.
Any help appreciated
d$Insect<-factor(d$Insect,levels(d$Insect)[c(2,1)])
d$Fungi<-factor(d$Fungi,levels(d$Fungi)[c(2,1)])
I recommend to use well defined groups on the x-axis. Then you can try
library(tidyverse)
library(ggsignif)
library(ggbeeswarm)
d %>%
unite(groups, Insect, Fungi, remove = F) %>%
{ggplot(.,aes(groups, value, fill= Fungi)) +
geom_boxplot() +
ggbeeswarm::geom_beeswarm()+
ggsignif::geom_signif(comparisons = combn(sort(unique(.$groups)), 2, simplify = F),
step_increase = 0.1)}

Resources