Having trouble rearranging multiple GGPLOT2 graphs and moving/resizing scale - r
I am working with a dataset consisting of different plant genotypes, rates of fertilizer applications, and 5 different measurements. I am using ggplot2 to produce multiple bar graphs, and then using the gridExtra package to combine multiple graphs onto a single page. The trouble I am having involves moving and resizing the scale so that there is only one scale for each of my graphs, and I would like to move it to the lower right corner of the graph. The data/code below should better explain what I mean.
Packages/Dataset
#Open packages
library(dplyr)
library(ggplot2)
library(gridExtra)
#Dataset
plantdata <- data.frame(genotype = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
2,
2,
1,
3,
3,
3,
1,
3,
2,
2,
1,
1,
1,
2,
2,
1,
3,
3,
3,
3,
2,
1,
2,
1),
rate=c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
2,
4,
1,
1,
3,
2,
3,
4,
1,
3,
4,
2,
2,
1,
3,
3,
4,
1,
3,
2,
4,
1,
2,
4),
measure1=c(958,
309,
750,
43,
20,
868,
905,
674,
64,
151,
677,
144,
803,
485,
707,
881,
684,
222,
399,
507,
4,
690,
831,
574,
104,
238,
378,
897,
63,
154,
582,
641,
750,
855,
194,
55),
measure2=c(359,
728,
180,
614,
241,
989,
117,
101,
95,
156,
227,
355,
597,
50,
636,
912,
149,
862,
897,
601,
176,
7,
182,
214,
453,
569,
124,
113,
969,
781,
713,
613,
800,
334,
435,
748),
measure3=c(639,
304,
891,
317,
869,
901,
723,
267,
837,
923,
171,
991,
107,
309,
733,
705,
819,
807,
346,
447,
891,
957,
359,
323,
846,
944,
400,
548,
327,
48,
677,
624,
564,
854,
658,
343),
measure4=c(805,
24,
624,
675,
261,
437,
601,
129,
733,
172,
746,
586,
142,
243,
103,
779,
612,
870,
84,
881,
850,
456,
255,
52,
228,
492,
556,
66,
670,
682,
736,
178,
568,
501,
229,
500),
measure5=c(667,
105,
565,
724,
238,
861,
299,
13,
171,
759,
755,
557,
739,
228,
870,
595,
793,
790,
572,
590,
365,
974,
550,
766,
441,
265,
245,
909,
150,
88,
473,
245,
340,
378,
998,
121))
Function for standard error of the mean:
sem <- function(x) sd(x)/sqrt(length(x))
Generating the graphs:
#Measurement 1 graph
meas1 <- select(plantdata, genotype, rate, measure1)
#Aggregating data frame
meas1_mean <- aggregate(meas1, by=list(meas1$genotype, meas1$rate), mean)
meas1_sem <- aggregate(meas1, by=list(meas1$genotype, meas1$rate), sem)
g1 <- ggplot(meas1_mean, aes(x=Group.1, y=measure1, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas1_mean$measure1, ymax=meas1_mean$measure1+meas1_sem$measure1), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 1")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
####################################################################
#Measurement 2 graph
meas2 <- select(plantdata, genotype, rate, measure2)
#Aggregating dataframe
meas2_mean <- aggregate(meas2, by=list(meas2$genotype, meas2$rate), mean)
meas2_sem <- aggregate(meas2, by=list(meas2$genotype, meas2$rate), sem)
#Generating graph
g2 <- ggplot(meas2_mean, aes(x=Group.1, y=measure2, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas2_mean$measure2, ymax=meas2_mean$measure2+meas2_sem$measure2), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 2")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
####################################################################
#Measurement 3 graph
meas3 <- select(plantdata, genotype, rate, measure3)
#Aggregating dataframe
meas3_mean <- aggregate(meas3, by=list(meas3$genotype, meas3$rate), mean)
meas3_sem <- aggregate(meas3, by=list(meas3$genotype, meas3$rate), sem)
#Graph
g3 <- ggplot(meas3_mean, aes(x=Group.1, y=measure3, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas3_mean$measure3, ymax=meas3_mean$measure3+meas3_sem$measure3), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 3")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
##############################################################
#Measurement 4 graph
meas4 <- select(plantdata, genotype, rate, measure4)
#Aggregating dataframe
meas4_mean <- aggregate(meas4, by=list(meas4$genotype, meas4$rate), mean)
meas4_sem <- aggregate(meas4, by=list(meas4$genotype, meas4$rate), sem)
#Graph
g4 <- ggplot(meas4_mean, aes(x=Group.1, y=measure4, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas4_mean$measure4, ymax=meas4_mean$measure4+meas4_sem$measure4), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 4")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
################################################################
#Measurement 5 graph
meas5 <- select(plantdata, genotype, rate, measure5)
#Aggregate dataframe
meas5_mean <- aggregate(meas5, by=list(meas5$genotype, meas5$rate), mean)
meas5_sem <- aggregate(meas5, by=list(meas5$genotype, meas5$rate), sem)
#Graph
g5 <- ggplot(meas5_mean, aes(x=Group.1, y=measure5, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas5_mean$measure5, ymax=meas5_mean$measure5+meas5_sem$measure5), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 5")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
Then, I arranged all the graphs onto one page, as follows:
grid.arrange(g1, g2, g3, g4, g5,
nrow=2, ncol=3)
The output:
Because the requirements for this graph are that it needs to be exported in a smaller format, I would like to re-arrange the scale so that there is only one scale for the whole combined image, and I would like to increase the size of it. See below:
How can I do this? If there is a better way to generate this than the way I did it, I am open to learning how to do it.
Consider the ggpubr package and in there the ggarrange function with the argument common.legend = TRUE.
a <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
b <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
c <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
library(ggplot2)
library(ggpubr)
p1 <- ggplot(a, aes(x = x, y = y, color = group)) + geom_point()
p2 <- ggplot(b, aes(x = x, y = y, color = group)) + geom_point()
p3 <- ggplot(c, aes(x = x, y = y, color = group)) + geom_point()
ggarrange(p1, p2, p3, common.legend = TRUE)
Documentation: https://rpkgs.datanovia.com/ggpubr/reference/ggarrange.html
Related
Adding Consecutive Arrows to geom_point() in ggplot2
I want to add a series of arrows connecting each observation in geom_point as in the graph: I understand that geom_segment is meant to be used, but I am having issues, and have not found something quite like this after quite a bit of searching. This is sample code that should satisfy the pattern: Note: The labels are not important ; just the arrows df <- data.frame(year = c(1935:1968), y_axis_values = c( 2755,2696, 2646, 2701, 2654, 2766, 2832, 2964, 3041, 3010, 3018, 3374, 3545, 3441, 3456, 3455, 3503, 3641, 3721, 3828, 3831, 3858, 3925, 3880, 3935, 3895, 3840, 3756, 3669, 3502, 3145, 2812, 2586,2441), x_axis_values = c(238, 240, 241, 242, 244, 245, 246, 268, 333, 335, 331, 253, 243, 241, 242, 237, 242, 240, 233, 232, 236, 245, 256, 261, 265, 278, 291, 290, 290, 307, 313, 325, 339, 338) I have tried the general formula with many different argument variations, but cannot seem to find it. ggplot(df, aes(x = x_axis_values, y = y_axis_values) + geom_point() + geom_segment()
You need the xend and yend values for each segment. Since your data frame is in order, the xend and yend value for each segment is just the next row's x and y values. You can get these by using dplyr::lead on the x and y aesthetics. library(ggplot2) library(dplyr) ggplot(df, aes(x = x_axis_values, y = y_axis_values)) + geom_point(color = "#69b3a2") + geom_segment(aes(xend = after_stat(lead(x)), yend = after_stat(lead(y))), arrow = arrow(length = unit(3, "mm")), color = "#69b3a2") + geom_text(aes(label = year), size = 5, fontface = 2, data = . %>% filter(year %in% c(1935, 1937, 1939, 1942, 1945, 1946, 1953, 1957, 1960, 1961)), nudge_x = c(-3, -2, 4, 0, 0, -2, -5, 0, 3, 5), nudge_y = c(30, -30, 10, -30, -40, -40, 0, -50, 30, 0)) + labs(x = "partic", y = "tfr") + theme_bw(base_size = 16)
Create bar plot in ggplot2 - Place data frame values instead of count
I'd like to place this data onto a bar plot using ggplot2 where the column "Clades" would be placed on the X axis and the values from each column (such as the values of 19A, for example) would be place on Y axis I'm trying something like this: cols = as.vector(names(snv_data)[2:19]) ggplot(df, aes(x=cols)) + geom_bar() But I keep getting this: I'm new to ggplot2 so any help is very welcome! I'm doing this to try and get 7 plots (one for each column such as 19A, 20A, 20B, etc) where each plot would have the Clades on the X-axis and each value from each column as the "counts" on the Y-axis dput: structure(list(Clades = c("C.T", "A.G", "G.A", "G.C", "T.C", "C.A", "G.T", "A.T", "T.A", "T.G", "A.C", "C.G", "A.del", "TAT.del", "TCTGGTTTT.del", "TACATG.del", "AGTTCA.del", "GATTTC.del"), `19A` = c(413, 93, 21, 0, 49, 9, 238, 13, 3, 1, 0, 4, 1, 0, 0, 0, 0, 0), `20A` = c(7929, 1920, 1100, 419, 1025, 124, 3730, 124, 22, 45, 64, 17, 8, 19, 23, 39, 0, 0), `20B` = c(5283, 1447, 2325, 1106, 336, 117, 946, 137, 35, 53, 123, 11, 9, 10, 21, 1, 0, 0), `20E (EU1)` = c(13086, 1927, 650, 1337, 1864, 96, 2967, 243, 69, 92, 115, 1486, 27, 5, 0, 1, 0, 0), `20I (Alpha, V1)` = c(71142, 12966, 12047, 15587, 14935, 15382, 11270, 12211, 5284, 4273, 430, 99, 5674, 4536, 4974, 4592, 0, 0), `20J (Gamma, V3)` = c(2822, 654, 883, 409, 501, 213, 843, 399, 203, 27, 429, 198, 1, 0, 197, 0, 0, 0), `21J (Delta)` = c(166003, 49195, 26713, 1399, 25824, 15644, 95967, 2011, 329, 11034, 716, 21087, 10532, 198, 0, 14, 9809, 10503)), class = "data.frame", row.names = c("C.T", "A.G", "G.A", "G.C", "T.C", "C.A", "G.T", "A.T", "T.A", "T.G", "A.C", "C.G", "A.del", "TAT.del", "TCTGGTTTT.del", "TACATG.del", "AGTTCA.del", "GATTTC.del"))
To add to the previous answer, here is how you can get 7 plots (1 for each Clade, which is how I interpreted the question) using facet_wrap(): df <- df %>% pivot_longer(-Clades) ggplot(data = df, aes(x = Clades, y = value)) + geom_bar(aes(fill = Clades), stat = 'identity') + facet_wrap(~name, scales = 'free_y') + theme(axis.text.x = element_blank())
As cazman said in the comments, you need to get your data in long form for it to work with ggplot2 (efficiently). First, use pivot_longer(), and then use ggplot2: library(tidyverse) dat %>% pivot_longer(-Clades) %>% ggplot(aes(x=Clades, y=value, fill=name)) + geom_col()
Repositioning and increasing the weight of borders around dendrogram produced with R 'plot' function
I am trying to cut a dendrogram into three classes using the rect.hclust function, but when I export the graph, it cuts off the borders at the bottom of the graph. In addition, I would like to increase the weight of the borders, but I am not sure how to do this, as the lwd argument doesn't seem to exist for this function. What can I do to fix these parameters? Data: cluster <- data.frame(plot=c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), meas1 = c(443, 836, 903, 684, 94, 125, 733, 846, 625, 234, 437, 775, 269, 774, 17, 502, 80, 51, 523, 229), meas2 = c(735, 574, 793, 261, 961, 136, 404, 138, 45, 935, 698, 675, 594, 497, 152, 153, 30, 667, 547, 745), meas3 = c(23, 526, 36, 93, 708, 970, 399, 111, 456, 439, 569, 503, 337, 213, 399, 850, 614, 491, 28, 452)) Code for hierarchical clustering and dendrogram generation: #Generate distance matrix dist_mat <- dist(cluster, method = 'euclidean') #Hierarchical clustering hclust_avg <- hclust(dist_mat, method = 'average') #Cut into 3 classes cut_avg <- cutree(hclust_avg, k = 3) #Plot dendrogram plot(hclust_avg, xlab = "", ylab = "Euclidean Distance", sub = "", main = "Cluster Dendrogram", lwd = 2) #Generate borders around each group rect.hclust(hclust_avg, k=3, border = 2:6) Exporting the graph looks as follows: I need the red-green-blue borders to be thicker (higher border weights), and I also need them to not be cut off at the bottom of the graph.
I bypassed the problem of the missing lower edge of the rectangle by setting the figure margins to zero before drawing the rectangle. The line width of the rectangles can be set by setting par(lwd), e.g. par(lwd=4), as in the example below: cluster <- data.frame(plot=1:20, meas1 = c(443, 836, 903, 684, 94, 125, 733, 846, 625, 234, 437, 775, 269, 774, 17, 502, 80, 51, 523, 229), meas2 = c(735, 574, 793, 261, 961, 136, 404, 138, 45, 935, 698, 675, 594, 497, 152, 153, 30, 667, 547, 745), meas3 = c(23, 526, 36, 93, 708, 970, 399, 111, 456, 439, 569, 503, 337, 213, 399, 850, 614, 491, 28, 452)) #Generate distance matrix dist_mat <- dist(cluster, method = 'euclidean') #Hierarchical clustering hclust_avg <- hclust(dist_mat, method = 'average') #Cut into 3 classes cut_avg <- cutree(hclust_avg, k = 3) pars <- par() #Plot dendrogram plot(hclust_avg, xlab = "", ylab = "Euclidean Distance", sub = "", main = "Cluster Dendrogram", lwd = 2) par(lwd=4, mar=c(0,0,0,0)) #Generate borders around each group rect.hclust(hclust_avg, k=3, border = 2:6) # reset par par(lwd=pars$lwd, mar=pars$mar) Created on 2020-06-30 by the reprex package (v0.3.0)
ggplot visualization questions
for visualized my data, I used gplot. Question: Why "colour" doesn't change, and is it possible to do type = "h" like in basic plot? print(qplot(roundpop, Observation, data=roundpopus), shape = 5, colour = "blue") # i tryed with "" and without. And if it's possible to change type to histogram, like on second picture, can I draw a line by the top of lines? Like that: and maybe to write labels (states) on the top of the lines. Because I know how to give a name only for dots on basic plot. Thank you!
Here are some options, which you may want to tweak according to your needs: library(ggplot2) df <- structure(list(x = c(1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 9, 10, 10, 10, 12, 13, 13, 20, 20, 27, 39 ), y = c(33, 124, 45, 294, 160, 105, 276, 178, 377, 506, 176, 393, 247, 378, 221, 796, 503, 162, 801, 486, 268, 575, 828, 493, 252, 495, 836, 551, 413, 832, 1841, 1927), lab = c("i8g8Q", "oXlWk", "NC2WO", "pYxBL", "Xfsy6", "FJcOl", "Ke98f", "K2mCW", "g4XYi", "ICzWp", "7nqrK", "dzhlC", "JagAW", "0bObp", "8ljIW", "E8OZR", "6Tuxz", "3Grbq", "xqsld", "BvuJT", "JXi2N", "eSDYS", "OYVWN", "vyWzK", "6AKxk", "nCgPx", "8lHrq", "kWAGm", "E08Rd", "cmIYY", "btoUm", "k6Iek")), .Names = c("x", "y", "lab"), row.names = c(NA, -32L), class = "data.frame") p <- ggplot(df, aes(x, y)) gridExtra::grid.arrange( p + geom_point(), p + geom_point() + geom_text(aes(label = lab), angle = 60, hjust = 0, size = 2), p + geom_segment(aes(xend=x, yend=0)), p + geom_segment(aes(xend=x, yend=0)) + geom_line(color = "red", size = 2) , p + geom_segment(aes(xend=x, yend=0)) + geom_smooth(span = .4, se = FALSE, color = "red", size = 2) )
R plot, control axis height or limit
I'm drawing two staked plot using par(mfrow = c(2,1)). On the lower plot I removed the default axis and added my own (this is a dummy example of a much more complex plot where I need to do this). The problem is that the axis I added goes up to the upper limit of the lower plot box, overlapping the numbers of the upper plot axis. I therefore need to limit the height of the lower plot axis in order to not overlap. Here some dummy code, I hope it would reproduce the effect: serie1 <- c(45, 257, 25, 55, 89, 297, 471, 1256, 312, 969, 788, 425, NaN, 77, 43, 38, 20, 6, 16, 13, 11, 6, 7, 2, 0, 31, 4, 3, 2, 2, 3, 4, 10, 16, 200, NaN) serie2 <- c(1106, 1654, 578, 1354, 536, 2384, 586, 1356, 1457, 1508, 4567, 4501, 4037, 7735, 6118, 2775, 1196, 916, 551, 991, 109, 174, 278, 100, 98, 124, 122, 138, 256, 226, 445, 638, 511, 465, 1733, 1278) opar <- par(mfrow = c(2,1)) par(mar = c(0,4,4,2)) plot.ts(serie1, col = 'red', lwd = 3, xaxt= 'n', xlab='') par(mar = c(5,4,0,2)) plot.ts(serie2, col = 'blue', lwd = 3, xaxt = 'n', yaxt = 'n', ylab = '', xlab = '') axis(2) mtext(side = 2, line = 3, 'serie2') par(opar)
You can fine-tune the axis labels by using the at (and the labels) parameter to axis(). Replace your line axis(2) by this: axis(2,at=(0:3)*2000) Look at ?axis to see your options. Alternatively, you could leave some room between your plots, i.e., change your par(mar=...) command.