geom_bar & multiple variables - r

I am having trouble getting my plots to work, I have multiple categorical variables by which I want to color by one, and facet by another. However, R keeps adding the "values" (I used melt) for the same variables together instead. It works when I only have one variable.
Here is my plot with one variable
Here is my plot with two variables, you can see the adding that is happening
simple dataframe
Here is my code:
library(reshape2)
library(ggplot2)
test2 <- structure(list(SampleID = c(12.19, 12.22, 13.1, 12.19, 12.22,
13.1, 12.19, 12.22, 13.1, 12.19, 12.22, 13.1), patient = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), type = structure(c(1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("L",
"T"), class = "factor"), timepoint = structure(c(1L, 2L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L), .Label = c("1", "2"), class = "factor"),
Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "D", class = "factor"), variable = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("A",
"B", "C", "D", "E", "F", "G", "H", "I"), class = "factor"),
value = c(2L, 5L, 6L, 25L, 18L, 12L, 6L, 10L, 15L, 21L, 23L,
33L)), .Names = c("SampleID", "patient", "type", "timepoint",
"Group", "variable", "value"), row.names = c(NA, 12L), class = "data.frame")
ggplot(test2, aes(test2$variable, test2$value, fill=test2$timepoint)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values=c("rosybrown1", "steelblue2", "gray")) +
labs(x="Category", y="Count", title = paste0("Sample ", as.character(unique(test2$patient)) , " - " , as.character(unique(test2$Group)))) +
facet_wrap(~test2$type) +
theme(text = element_text(size=15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5, size = 7))

If I am understanding right, it looks like you just need to give the scales option to facet_wrap like so:
facet_wrap(~type, scales = "free_x")

Related

Percentages in the wrong position in ggplot2

I'm trying to plot a graph for a likert test using ggplot2 and I would like to have the percentages values appearing on the graph. I've created a df with all the averages and percentages so I could write it on the graph. It all seems to be working good, except the values are being plotted as if they were upsided or something.
This is the code I'm using
example <- structure(list(grupo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("EJA",
"REG"), class = "factor"), nivel = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("CINCO", "DOZE", "NOVE"), class = "factor"), tipo = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L), .Label = c("COR", "PAD", "RES"), class = "factor"),
likert = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L), .Label = c("0",
"1", "2", "3"), class = c("ordered", "factor")), cnt = c(3L,
1L, 3L, 5L, 3L, 1L, 3L, 6L, 2L, 1L, 10L, 5L, 5L, 9L, 11L,
6L, 4L, 10L, 10L, 10L), freq = c(0.25, 0.083, 0.25, 0.417,
0.231, 0.077, 0.231, 0.462, 0.154, 0.077, 0.769, 0.167, 0.167,
0.3, 0.367, 0.2, 0.133, 0.333, 0.333, 0.333), prop = c(25,
8.3, 25, 41.7, 23.1, 7.7, 23.1, 46.2, 15.4, 7.7, 76.9, 16.7,
16.7, 30, 36.7, 20, 13.3, 33.3, 33.3, 33.3), proptext = c("25",
"8.3", "25", "41.7", "23.1", "7.7", "23.1", "46.2", "15.4",
"7.7", "76.9", "16.7", "16.7", "30", "36.7", "20", "13.3",
"33.3", "33.3", "33.3")), row.names = c(NA, -20L), groups = structure(list(
grupo = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("EJA",
"REG"), class = "factor"), nivel = structure(c(1L, 1L, 1L,
2L, 2L, 2L), .Label = c("CINCO", "DOZE", "NOVE"), class = "factor"),
tipo = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("COR",
"PAD", "RES"), class = "factor"), .rows = structure(list(
1:4, 5:8, 9:11, 12:15, 16:19, 20L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
ggplot(example, aes(x=(interaction(grupo, nivel, tipo)),y=prop, fill=likert))+
geom_col()+
#scale_y_continuous(labels = percent)+
coord_flip() +
ggtitle("Testing")+
xlab("A, B, and C")+
ylab("%")+
geom_text(aes(label = proptext), size = 2, colour = "black")
Would someone have an idea of how could I solve it?
The geom_text may also require the x, y
library(dplyr)
library(tidyr)
library(ggplot2)
example %>%
unite(new, grupo, nivel, tipo, sep = ".") %>%
ggplot(aes(x=new, fill=likert))+
geom_col(aes(y= prop))+
geom_text(aes(x = new, y = prop, label = proptext),
position = position_stack(vjust = .5)) +
coord_flip() +
#scale_y_continuous(labels = percent)+
ggtitle("Testing")+
xlab("A, B, and C")+
ylab("%")
-output

how can I add multiple pvalues to ggplot grouped boxplot

I have boxplot and I would like to add pvalues for 4 comparisons across two factors.
Here is the data set:
dput(CauloQ_datMannot)
structure(list(V1 = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
3L, 3L,
3L, 4L, 4L, 4L), .Label = c("B", "BF", "BFi ", "Bi"),
class = "factor"),
variable = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L), .Label = c("V2", "V3", "V4"), class = "factor"),
value = c(0.00051, 0.00055, 0.00056, 0.00074, 0.00079, 0.00083,
0.00093, 0.00082, 0.00073, 0.0011, 0.00113, 0.00098),
Location = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Root", class = "factor"),
Bean = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "Bean", class = "factor"), Fungi = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("M+",
"M-"), class = "factor"), Insect = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Insect",
"NI"), class = "factor")), .Names = c("V1", "variable", "value",
"Location", "Bean", "Fungi", "Insect"), row.names = c(NA, -12L
), class = "data.frame")
Here is my current graph:
ggplot(CauloQ_datMannot,aes(x=Insect,y=value,fill=Fungi))+geom_boxplot()+
guides(fill=guide_legend("Metarhizium")) +
ggtitle("Caulobacter qPCR")+
scale_x_discrete(labels= c("I+","I-","soil alone"))+
theme(plot.title = element_text(size = 18, face = "bold"))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
theme(legend.text=element_text(size=14),
legend.title=element_text(size=14)) +
theme(strip.text.x = element_text(size = 14))
I have installed ggpubr, and have read up on compare_stat_means, but can't figure out how to make comparisons involving the two factors. That is I want 4 pvalues
M+/I+ vs M-/I+, and M+/I- vs M-/I-, and I+/M+ vs I-/M+, and I+/M- vs I-/M-
Any help is appreciated. thanks
>
Great. Now thanks to Jimbou, I have the following plot.
d %>% unite(groups, Insect, Fungi, remove = F) %>%
{ggplot(.,aes(groups, value, fill= Fungi)) +
geom_boxplot() + # ggbeeswarm::geom_beeswarm()+
ggsignif::geom_signif(comparisons = combn(sort(unique(.$groups)),2, simplify = F),
step_increase = 0.1,test='t.test')}
However, I would like to re-order the boxes, ie. with all I+ ones first (M+ first within that). I tried re-ordering the levels and then manually the rows, that neither worked.
Any help appreciated
d$Insect<-factor(d$Insect,levels(d$Insect)[c(2,1)])
d$Fungi<-factor(d$Fungi,levels(d$Fungi)[c(2,1)])
I recommend to use well defined groups on the x-axis. Then you can try
library(tidyverse)
library(ggsignif)
library(ggbeeswarm)
d %>%
unite(groups, Insect, Fungi, remove = F) %>%
{ggplot(.,aes(groups, value, fill= Fungi)) +
geom_boxplot() +
ggbeeswarm::geom_beeswarm()+
ggsignif::geom_signif(comparisons = combn(sort(unique(.$groups)), 2, simplify = F),
step_increase = 0.1)}

How can I save / generated multiple single graphs from faceted graphs at once?

I have a dataset of >100 different samples. Samples are from different genotypes (e.g. X, Y, Z) and 4 different time points (T0,1,2,3) with 3 biological replicates (R1,2,3). I'm measuring values for 50 different genes (in rows; A,B..)
longdata <- structure(list(Gene = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("A", "B"), class = "factor"), Genotype = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("X", "Y", "Z"), class = "factor"),
Time = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("T0",
"T1", "T2", "T3"), class = "factor"), Ave = c(1.32606106633333,
1.499956424, 1.118528738, 1.025082136, 0.424537206666667,
0.723243112666667, 0.335509156333333, 0.328275209, 0.788329993666667,
1.125292329, 2.357924224, 0.678921448, 0.222768019, 0.293117217,
0.548228048, 0.841192647333333, 3.144197864, 0.576764958333333,
1.32037215366667, 1.15039119233333, 1.03539976366667, 1.00032109266667,
0.740699933666667, 0.687992671666667), SE = c(0.119785209010494,
0.168580466330281, 0.264739468221289, 0.124588107424543,
0.194995686650518, 0.0392007703821249, 0.06203362889702,
0.0482287534807508, 0.396968455138007, 0.0903480171168777,
0.717823561374135, 0.164024037188693, 0.0078580995264886,
0.0980939303386436, 0.233081861930954, 0.0870744069976396,
0.324195222544884, 0.434640930315622, 0.0658409437053185,
0.135850334794207, 0.175517934316736, 0.123213160632528,
0.133598346586129, 0.203707785326976)), .Names = c("Gene",
"Genotype", "Time", "Ave", "SE"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -24L))
How can I modify this syntax to generate each graph separately and save them as JPG/PNG files?
longdata %>% ggplot(aes(x = Time, y = Ave, fill = Genotype)) + geom_bar(position = position_dodge(), stat = "identity") + geom_errorbar(aes(ymin = Ave - SE, ymax = Ave + SE), width = 0.1, position = position_dodge(0.9)) + facet_wrap(~ Gene)
You can put ggplot and ggsave within a loop.
lapply(sort(unique(longdata$Gene)), function(i){
ggplot(longdata[longdata$Gene == i, ], aes(x = Time, y = Ave, fill = Genotype)) + geom_bar(position = position_dodge(), stat = "identity") + geom_errorbar(aes(ymin = Ave - SE, ymax = Ave + SE), width = 0.1, position = position_dodge(0.9))
ggsave(filename = paste0(i, ".png"))
})
This loop gets the unique elements of Gene, sorts them, create a plot, then save the result.

TukeyHSD specific conditions

Im trying to get an TukeyHSD to run in R, my code looks like this:
#----------------------------------------------------------------------------------------#
# RING data:
#----------------------------------------------------------------------------------------#
library(doBy)
# Set working directory
setwd("")
#### Read data & Converting factors ####
dat <- read.table("afstand.txt", header =TRUE)
str(dat)
dat$Vial <- as.factor(dat$Vial)
dat$Line <- as.factor(dat$Line)
dat$Fly <- as.factor(dat$Fly)
dat$Temp <- as.factor(dat$Temp)
str(dat)
datSUM <- summaryBy(X0.5_sec+X1_sec+X1.5_sec+X2_sec+X2.5_sec+X3_sec~Vial_nr+Concentration+Sex+Line+Vial+Temp,data=dat, FUN=sum)
fl<-levels(datSUM$Line)
aov1 <- aov(X0.5_sec.sum ~ Concentration*Sex*Line*Temp, data=datSUM)
summary(aov1) #Overview of model
TukeyHSD(aov1, 'Line',ordered = TRUE, conf.level = 0.95)
What I would like to do is look at interactions between Line and Temp for instance, but if I run TukeyHSD(aov1) then i get ALL the interactions, resulting in this error: [ reached getOption("max.print") -- omitted 3716 rows ] Is there a way where i can specify that i wanna test only between Line and Temp and not all combinations, or a way of showing only significant results if I just run TukeyHSD(avo1)?
I have tried using TukeyHSD(aov1, 'Line,Temp',ordered = TRUE, conf.level = 0.95) , TukeyHSD(aov1, 'Line':'Temp',ordered = TRUE, conf.level = 0.95) and TukeyHSD(aov1, 'Line'&'Temp',ordered = TRUE, conf.level = 0.95) but with no luck.
structure(list(Concentration = structure(c(2L, 7L, 7L, 1L, 7L,
1L, 2L, 1L, 7L, 1L, 4L, 2L, 2L, 1L, 2L, 4L, 7L, 2L, 2L, 1L), .Label = c("a",
"b", "c", "d", "e", "x", "y"), class = "factor"), Sex = structure(c(1L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L), .Label = c("f", "m"), class = "factor"), Line = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L,
3L, 3L, 3L), .Label = c("20", "23", "40", "73"), class = "factor"),
Temp = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L), .Label = c("23",
"29"), class = "factor"), X0.5_sec.sum = c(107.19, 46.17,
58.05, 75.87, 87.75, 71.55, 46.17, 47.25, 22.41, 31.05, 27.36,
79.11, 87.58, 21.33, 34.29, 60.4, 85.05, 72.47, 114.21, 67.77
)), .Names = c("Concentration", "Sex", "Line", "Temp", "X0.5_sec.sum"
), row.names = c(NA, 20L), class = "data.frame")
To only show interactions between variables Line and Temp, you can specify the argument whichas follows:
which = 'Line:Temp'
which then turns your complete function call to TukeyHSDinto:
TukeyHSD(aov1, 'Line:Temp', ordered = TRUE, conf.level = 0.95)

Plot histogram in ggplot

I have this dataframe.
dput(EF_Lat_Am)
structure(list(V1 = structure(c(4L, 3L, 5L, 6L, 1L, 2L, 7L, 8L,
4L, 3L, 5L, 6L, 1L, 2L, 7L, 8L), .Label = c("Crop Agriculture",
"Mining", "Mixed Agriculture", "Other land use", "Pasture", "Tree crops",
"Urban", "Water"), class = "factor"), V2 = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Emission Factor", class = "factor"),
V3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("2000", "2005"), class = "factor"),
V4 = c(77.0109486116396, 69.2454348145657, 73.684103657833,
71.0430911289891, 43.136201172115, 117.358146800995, 77.4653952935238,
89.0966064142874, 71.8286578413912, 67.9099357961953, 76.7438444998728,
67.4818461466729, 50.6468079101972, 117.799797611894, 78.7347377710757,
81.3020943196897)), .Names = c("V1", "V2", "V3", "V4"), row.names = c(NA,
16L), class = "data.frame")
As you can see for the years 2000 and 2005, I have an emission factor value for each type of land use. I want to plot an histogram with the type of land use in the x axis and the emission factors in the y axis. In addition, for each land use I want the bars for the two years to be adjacents. I also want a legend showing for which years correspond the bars (either 2000 or 2005). Thanks for your help.
Here is the answer.
ggplot(EF_Lat_Am, aes(x=V1, y = V4, fill=V3, width=.85)) + geom_bar(position="dodge", stat="identity") +
labs(x = "", y = "EF (T/ha)") +
theme(axis.text=element_text(size=16),axis.title=element_text(size=20),
legend.title=element_text(size=20, face='bold'),legend.text=element_text(size=20), axis.line = element_line(colour = "black")) +
scale_fill_grey("Period") + scale_y_continuous(limits=c(0,120)) + theme_classic(base_size = 20, base_family = "")

Resources