ggplot visualization questions - r

for visualized my data, I used gplot.
Question: Why "colour" doesn't change, and is it possible to do type = "h" like in basic plot?
print(qplot(roundpop, Observation, data=roundpopus), shape = 5, colour = "blue") # i tryed with "" and without.
And if it's possible to change type to histogram, like on second picture, can I draw a line by the top of lines?
Like that:
and maybe to write labels (states) on the top of the lines. Because I know how to give a name only for dots on basic plot.
Thank you!

Here are some options, which you may want to tweak according to your needs:
library(ggplot2)
df <- structure(list(x = c(1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6,
6, 6, 7, 7, 7, 7, 8, 9, 10, 10, 10, 12, 13, 13, 20, 20, 27, 39
), y = c(33, 124, 45, 294, 160, 105, 276, 178, 377, 506, 176,
393, 247, 378, 221, 796, 503, 162, 801, 486, 268, 575, 828, 493,
252, 495, 836, 551, 413, 832, 1841, 1927), lab = c("i8g8Q", "oXlWk",
"NC2WO", "pYxBL", "Xfsy6", "FJcOl", "Ke98f", "K2mCW", "g4XYi",
"ICzWp", "7nqrK", "dzhlC", "JagAW", "0bObp", "8ljIW", "E8OZR",
"6Tuxz", "3Grbq", "xqsld", "BvuJT", "JXi2N", "eSDYS", "OYVWN",
"vyWzK", "6AKxk", "nCgPx", "8lHrq", "kWAGm", "E08Rd", "cmIYY",
"btoUm", "k6Iek")), .Names = c("x", "y", "lab"), row.names = c(NA,
-32L), class = "data.frame")
p <- ggplot(df, aes(x, y))
gridExtra::grid.arrange(
p + geom_point(),
p + geom_point() + geom_text(aes(label = lab), angle = 60, hjust = 0, size = 2),
p + geom_segment(aes(xend=x, yend=0)),
p + geom_segment(aes(xend=x, yend=0)) + geom_line(color = "red", size = 2) ,
p + geom_segment(aes(xend=x, yend=0)) + geom_smooth(span = .4, se = FALSE, color = "red", size = 2)
)

Related

From Boxplot to Barplot in ggplot possible?

I have to do a ggplot barplot with errorbars, Tukey sig. letters for plants grown with different fertilizer concentraitions.
The data should be grouped after the dif. concentrations and the sig. letters should be added automaticaly.
I have already a code for the same problem but for Boxplot - which is working nicely. I tried several tutorials with barplots but I always get the problem; stat_count() can only have an x or y aesthetic.
So I thought, is it possible to get my boxplot code to a barplot code? I tried but I couldnt do it :) And if not - how do I automatically add tukeyHSD Test result sig. letters to a ggplot barplot?
This is my Code for the boxplot with the tukey letters:
    value_max = Dünger, group_by(Duenger.g), summarize(max_value = max(Höhe.cm))
hsd=HSD.test(aov(Höhe.cm~Duenger.g, data=Dünger),
trt = "Duenger.g", group = T) sig.letters <- hsd$groups[order(row.names(hsd$groups)), ]
J <- ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm))+ geom_boxplot(aes(fill= Duenger.g))+ scale_fill_discrete(labels=c("0.5g", '1g', "2g", "3g", "4g"))+ geom_text(data = value_max, aes(x=Duenger.g, y = 0.1 + max_value, label = sig.letters$groups), vjust=0)+ stat_boxplot(geom = 'errorbar', width = 0.1)+ ggtitle("Auswirkung von Dünger auf die Höhe von Pflanzen") + xlab("Dünger in g") + ylab("Höhe in cm"); J
This is how it looks:
boxplot with tukey
Data from dput:
structure(list(Duenger.g = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4), plant = c(1, 2, 3, 4, 5, 7, 10, 11, 12, 13, 14, 18, 19,
21, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40,
41, 42, 43, 44, 48, 49, 50, 53, 54, 55, 56, 57, 58, 61, 62, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 83, 85, 86,
88, 89, 91, 93, 99, 100, 102, 103, 104, 105, 106, 107, 108, 110,
111, 112, 113, 114, 115, 116, 117, 118, 120, 122, 123, 125, 126,
127, 128, 130, 131, 132, 134, 136, 138, 139, 140, 141, 143, 144,
145, 146, 147, 149), height.cm = c(5.7, 2.8, 5.5, 8, 3.5, 2.5,
4, 6, 10, 4.5, 7, 8.3, 11, 7, 8, 2.5, 7.4, 3, 14.5, 7, 12, 7.5,
30.5, 27, 6.5, 19, 10.4, 12.7, 27.3, 11, 11, 10.5, 10.5, 13,
53, 12.5, 12, 6, 12, 35, 8, 16, 56, 63, 69, 62, 98, 65, 77, 32,
85, 75, 33.7, 75, 55, 38.8, 39, 46, 35, 59, 44, 31.5, 49, 34,
52, 37, 43, 38, 28, 14, 28, 19, 20, 23, 17.5, 32, 16, 17, 24.7,
34, 50, 12, 14, 21, 33, 39.3, 41, 29, 35, 48, 40, 65, 35, 10,
26, 34, 41, 32, 38, 23.5, 22.2, 20.5, 29, 34, 45)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -105L))
Thank you
mirai
A bar chart and a boxplot are two different things. By default geom_boxplot computes the boxplot stats by default (stat="boxplot"). In contrast when you use geom_bar it will by default count the number of observations (stat="count") which are then mapped on y. That's the reason why you get an error. Hence, simply replacing geom_boxplot by geom_bar will not give your your desired result. Instead you could use e.g. stat_summary to create your bar chart with errorbars. Additionally I created a summary dataset to add the labels on the top of the error bars.
library(ggplot2)
library(dplyr)
library(agricolae)
Dünger <- Dünger |>
rename("Höhe.cm" = height.cm) |>
mutate(Duenger.g = factor(Duenger.g))
hsd <- HSD.test(aov(Höhe.cm ~ Duenger.g, data = Dünger), trt = "Duenger.g", group = T)
sig.letters <- hsd$groups %>% mutate(Duenger.g = row.names(.))
duenger_sum <- Dünger |>
group_by(Duenger.g) |>
summarize(mean_se(Höhe.cm)) |>
left_join(sig.letters, by = "Duenger.g")
ggplot(Dünger, aes(x = Duenger.g, y = Höhe.cm, fill = Duenger.g)) +
stat_summary(geom = "bar", fun = "mean") +
stat_summary(geom = "errorbar", width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(data = duenger_sum, aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)
#> No summary function supplied, defaulting to `mean_se()`
But as the summary dataset now already contains the mean and the values for the error bars a second option would be to do:
ggplot(duenger_sum, aes(x = Duenger.g, y = y, fill = Duenger.g)) +
geom_col() +
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = .1) +
scale_fill_discrete(labels = c("0.5g", "1g", "2g", "3g", "4g")) +
geom_text(aes(y = ymax, label = groups), vjust = 0, nudge_y = 1) +
labs(
title = "Auswirkung von Dünger auf die Höhe von Pflanzen",
x = "Dünger in g", y = "Höhe in cm"
)

Create bar plot in ggplot2 - Place data frame values instead of count

I'd like to place this data onto a bar plot using ggplot2
where the column "Clades" would be placed on the X axis and the values from each column (such as the values of 19A, for example) would be place on Y axis
I'm trying something like this:
cols = as.vector(names(snv_data)[2:19])
ggplot(df, aes(x=cols)) + geom_bar()
But I keep getting this:
I'm new to ggplot2 so any help is very welcome!
I'm doing this to try and get 7 plots (one for each column such as 19A, 20A, 20B, etc) where each plot would have the Clades on the X-axis and each value from each column as the "counts" on the Y-axis
dput:
structure(list(Clades = c("C.T", "A.G", "G.A", "G.C", "T.C",
"C.A", "G.T", "A.T", "T.A", "T.G", "A.C", "C.G", "A.del", "TAT.del",
"TCTGGTTTT.del", "TACATG.del", "AGTTCA.del", "GATTTC.del"), `19A` = c(413,
93, 21, 0, 49, 9, 238, 13, 3, 1, 0, 4, 1, 0, 0, 0, 0, 0), `20A` = c(7929,
1920, 1100, 419, 1025, 124, 3730, 124, 22, 45, 64, 17, 8, 19,
23, 39, 0, 0), `20B` = c(5283, 1447, 2325, 1106, 336, 117, 946,
137, 35, 53, 123, 11, 9, 10, 21, 1, 0, 0), `20E (EU1)` = c(13086,
1927, 650, 1337, 1864, 96, 2967, 243, 69, 92, 115, 1486, 27,
5, 0, 1, 0, 0), `20I (Alpha, V1)` = c(71142, 12966, 12047, 15587,
14935, 15382, 11270, 12211, 5284, 4273, 430, 99, 5674, 4536,
4974, 4592, 0, 0), `20J (Gamma, V3)` = c(2822, 654, 883, 409,
501, 213, 843, 399, 203, 27, 429, 198, 1, 0, 197, 0, 0, 0), `21J (Delta)` = c(166003,
49195, 26713, 1399, 25824, 15644, 95967, 2011, 329, 11034, 716,
21087, 10532, 198, 0, 14, 9809, 10503)), class = "data.frame", row.names = c("C.T",
"A.G", "G.A", "G.C", "T.C", "C.A", "G.T", "A.T", "T.A", "T.G",
"A.C", "C.G", "A.del", "TAT.del", "TCTGGTTTT.del", "TACATG.del",
"AGTTCA.del", "GATTTC.del"))
To add to the previous answer, here is how you can get 7 plots (1 for each Clade, which is how I interpreted the question) using facet_wrap():
df <- df %>%
pivot_longer(-Clades)
ggplot(data = df,
aes(x = Clades,
y = value)) +
geom_bar(aes(fill = Clades),
stat = 'identity') +
facet_wrap(~name, scales = 'free_y') +
theme(axis.text.x = element_blank())
As cazman said in the comments, you need to get your data in long form for it to work with ggplot2 (efficiently).
First, use pivot_longer(), and then use ggplot2:
library(tidyverse)
dat %>%
pivot_longer(-Clades) %>%
ggplot(aes(x=Clades, y=value, fill=name)) +
geom_col()

Having trouble rearranging multiple GGPLOT2 graphs and moving/resizing scale

I am working with a dataset consisting of different plant genotypes, rates of fertilizer applications, and 5 different measurements. I am using ggplot2 to produce multiple bar graphs, and then using the gridExtra package to combine multiple graphs onto a single page. The trouble I am having involves moving and resizing the scale so that there is only one scale for each of my graphs, and I would like to move it to the lower right corner of the graph. The data/code below should better explain what I mean.
Packages/Dataset
#Open packages
library(dplyr)
library(ggplot2)
library(gridExtra)
#Dataset
plantdata <- data.frame(genotype = c(1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
2,
2,
1,
3,
3,
3,
1,
3,
2,
2,
1,
1,
1,
2,
2,
1,
3,
3,
3,
3,
2,
1,
2,
1),
rate=c(1,
2,
3,
4,
1,
2,
3,
4,
1,
2,
3,
4,
2,
4,
1,
1,
3,
2,
3,
4,
1,
3,
4,
2,
2,
1,
3,
3,
4,
1,
3,
2,
4,
1,
2,
4),
measure1=c(958,
309,
750,
43,
20,
868,
905,
674,
64,
151,
677,
144,
803,
485,
707,
881,
684,
222,
399,
507,
4,
690,
831,
574,
104,
238,
378,
897,
63,
154,
582,
641,
750,
855,
194,
55),
measure2=c(359,
728,
180,
614,
241,
989,
117,
101,
95,
156,
227,
355,
597,
50,
636,
912,
149,
862,
897,
601,
176,
7,
182,
214,
453,
569,
124,
113,
969,
781,
713,
613,
800,
334,
435,
748),
measure3=c(639,
304,
891,
317,
869,
901,
723,
267,
837,
923,
171,
991,
107,
309,
733,
705,
819,
807,
346,
447,
891,
957,
359,
323,
846,
944,
400,
548,
327,
48,
677,
624,
564,
854,
658,
343),
measure4=c(805,
24,
624,
675,
261,
437,
601,
129,
733,
172,
746,
586,
142,
243,
103,
779,
612,
870,
84,
881,
850,
456,
255,
52,
228,
492,
556,
66,
670,
682,
736,
178,
568,
501,
229,
500),
measure5=c(667,
105,
565,
724,
238,
861,
299,
13,
171,
759,
755,
557,
739,
228,
870,
595,
793,
790,
572,
590,
365,
974,
550,
766,
441,
265,
245,
909,
150,
88,
473,
245,
340,
378,
998,
121))
Function for standard error of the mean:
sem <- function(x) sd(x)/sqrt(length(x))
Generating the graphs:
#Measurement 1 graph
meas1 <- select(plantdata, genotype, rate, measure1)
#Aggregating data frame
meas1_mean <- aggregate(meas1, by=list(meas1$genotype, meas1$rate), mean)
meas1_sem <- aggregate(meas1, by=list(meas1$genotype, meas1$rate), sem)
g1 <- ggplot(meas1_mean, aes(x=Group.1, y=measure1, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas1_mean$measure1, ymax=meas1_mean$measure1+meas1_sem$measure1), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 1")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
####################################################################
#Measurement 2 graph
meas2 <- select(plantdata, genotype, rate, measure2)
#Aggregating dataframe
meas2_mean <- aggregate(meas2, by=list(meas2$genotype, meas2$rate), mean)
meas2_sem <- aggregate(meas2, by=list(meas2$genotype, meas2$rate), sem)
#Generating graph
g2 <- ggplot(meas2_mean, aes(x=Group.1, y=measure2, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas2_mean$measure2, ymax=meas2_mean$measure2+meas2_sem$measure2), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 2")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
####################################################################
#Measurement 3 graph
meas3 <- select(plantdata, genotype, rate, measure3)
#Aggregating dataframe
meas3_mean <- aggregate(meas3, by=list(meas3$genotype, meas3$rate), mean)
meas3_sem <- aggregate(meas3, by=list(meas3$genotype, meas3$rate), sem)
#Graph
g3 <- ggplot(meas3_mean, aes(x=Group.1, y=measure3, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas3_mean$measure3, ymax=meas3_mean$measure3+meas3_sem$measure3), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 3")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
##############################################################
#Measurement 4 graph
meas4 <- select(plantdata, genotype, rate, measure4)
#Aggregating dataframe
meas4_mean <- aggregate(meas4, by=list(meas4$genotype, meas4$rate), mean)
meas4_sem <- aggregate(meas4, by=list(meas4$genotype, meas4$rate), sem)
#Graph
g4 <- ggplot(meas4_mean, aes(x=Group.1, y=measure4, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas4_mean$measure4, ymax=meas4_mean$measure4+meas4_sem$measure4), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 4")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
################################################################
#Measurement 5 graph
meas5 <- select(plantdata, genotype, rate, measure5)
#Aggregate dataframe
meas5_mean <- aggregate(meas5, by=list(meas5$genotype, meas5$rate), mean)
meas5_sem <- aggregate(meas5, by=list(meas5$genotype, meas5$rate), sem)
#Graph
g5 <- ggplot(meas5_mean, aes(x=Group.1, y=measure5, fill=factor(Group.2)))+
geom_bar(stat="identity",width=0.6, position="dodge", col="black")+
scale_fill_discrete(name= 'rate', labels=c("1","2","3","4"))+
xlab("Genotype")+ylab("Measurement")+
geom_errorbar(aes(ymin= meas5_mean$measure5, ymax=meas5_mean$measure5+meas5_sem$measure5), width=0.2, position = position_dodge(0.6))+
ggtitle("Plant Measurement 5")+
scale_fill_brewer(palette='PRGn', name= 'rate', labels=c("1","2","3","4"))+
theme(plot.title = element_text(hjust=0.5))
Then, I arranged all the graphs onto one page, as follows:
grid.arrange(g1, g2, g3, g4, g5,
nrow=2, ncol=3)
The output:
Because the requirements for this graph are that it needs to be exported in a smaller format, I would like to re-arrange the scale so that there is only one scale for the whole combined image, and I would like to increase the size of it. See below:
How can I do this? If there is a better way to generate this than the way I did it, I am open to learning how to do it.
Consider the ggpubr package and in there the ggarrange function with the argument common.legend = TRUE.
a <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
b <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
c <- data.frame(x = rnorm(100), y=rnorm(100), group = gl(5,20))
library(ggplot2)
library(ggpubr)
p1 <- ggplot(a, aes(x = x, y = y, color = group)) + geom_point()
p2 <- ggplot(b, aes(x = x, y = y, color = group)) + geom_point()
p3 <- ggplot(c, aes(x = x, y = y, color = group)) + geom_point()
ggarrange(p1, p2, p3, common.legend = TRUE)
Documentation: https://rpkgs.datanovia.com/ggpubr/reference/ggarrange.html

Overlapping two graphs with different y-axis in ggplot

I want to overlap two plots with different y-axis scales. I use stat_count() and geom_line. However, geom_line doesn't appear on the plot.
I use the following code.
library(ggplot2)
ggplot(X1, aes(x = Week)) +
stat_count() +
scale_x_continuous(breaks = seq(from = 0, to = 21, by = 1))+
scale_y_continuous(
name = expression("Count"),
limits = c(0, 20),
sec.axis = sec_axis(~ . * 15000 / 20, name = "Views"))+
geom_line(aes(y = Views), inherit.aes = T)
Here is the reproducible example of my data frame X1.
structure(list(Views = c(1749, 241, 309, 326, 237, 276, 2281,
1573, 10790, 1089, 1732, 3263, 2601, 2638, 2929, 3767, 2947,
65, 161), Week = c(1, 2, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8, 8, 8,
8, 9, 10, 10, 10)), row.names = c(NA, -19L), class = c("tbl_df",
"tbl", "data.frame"))
Could you help me to put geom_line on the plot, please?
You also have to adjust the y values so that they fit inside the limits of the primary y-axis, i.e. apply the transfomation used for the secondary y-axis also inside geom_line. Try this:
X1 <- structure(list(Views = c(1749, 241, 309, 326, 237, 276, 2281,
1573, 10790, 1089, 1732, 3263, 2601, 2638, 2929, 3767, 2947,
65, 161), Week = c(1, 2, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8, 8, 8,
8, 9, 10, 10, 10)), row.names = c(NA, -19L), class = c("tbl_df",
"tbl", "data.frame"))
library(ggplot2)
ggplot(X1, aes(x = Week)) +
stat_count() +
scale_x_continuous(breaks = seq(from = 0, to = 21, by = 1))+
scale_y_continuous(
name = expression("Count"),
limits = c(0, 20),
sec.axis = sec_axis(~ . * 15000 / 20, name = "Views"))+
geom_line(aes(y = Views / 15000 * 20), inherit.aes = T)
Created on 2020-05-21 by the reprex package (v0.3.0)
I also summarised the dataframe to improve the interpretation of the week 5 spike and plotted separate layers
x1 <- structure(list(Views = c(1749, 241, 309, 326, 237, 276, 2281,
1573, 10790, 1089, 1732, 3263, 2601, 2638, 2929, 3767, 2947,
65, 161), Week = c(1, 2, 2, 2, 3, 3, 4, 5, 5, 5, 6, 8, 8, 8,
8, 9, 10, 10, 10)), row.names = c(NA, -19L), class = c("tbl_df",
"tbl", "data.frame"))
x2 <- x1 %>%
group_by(Week) %>%
summarise(Views = sum(Views))
library(ggplot2)
ggplot() +
geom_line(data = x2, mapping = aes(x = Week, y = Views/15000 * 20))+
geom_bar(data = x1, mapping = aes(x = Week), stat = 'count')+
scale_x_continuous(breaks = seq(from = 0, to = 21, by = 1))+
scale_y_continuous( name = expression("Count"),
ylim.prim <- c(0, 20),
ylim.sec <- c(0, 15000),
sec.axis = sec_axis(~ . * 15000 / 20, name = "Views"))

I would like to plot two data frames on a single plot

I am fairly new to R and am attempting to plot data frames simultaneously using ggplot2.
I have two data frames.
One is called WorkSchedMonday and consist of 96 rows and 4 columns.
structure(c(9, 9, 9, 9, 18, 18, 36, 36, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivMondayAtHome",
"WorkSchedAndIndivMondayAtSingleWorkPlace", "WorkSchedAndIndivMondayAtVarietyOfPlaces",
"WorkSchedAndIndivMondayWorkingOnTheMove")))
The other is called WorkSchedTuesday and consist of 96 rows and 4 columns.
structure(c(0, 0, 0, 0, 9, 9, 27, 27, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivTuesdayAtHome",
"WorkSchedAndIndivTuesdayAtSingleWorkPlace", "WorkSchedAndIndivTuesdayAtVarietyOfPlaces",
"WorkSchedAndIndivTuesdayWorkingOnTheMove")))
Using the following code a plotted the 2 data frames.
WorkSchedWeek<-as.matrix(cbind(WorkSchedAndIndivMondayAtHome,WorkSchedAndIndivMondayAtSingleWorkPlace,WorkSchedAndIndivMondayAtVarietyOfPlaces, WorkSchedAndIndivMondayWorkingOnTheMove, WorkSchedAndIndivTuesdayAtHome,WorkSchedAndIndivTuesdayAtSingleWorkPlace,WorkSchedAndIndivTuesdayAtVarietyOfPlaces, WorkSchedAndIndivTuesdayWorkingOnTheMove))
####
melted_WorkSchedWeek<- melt(WorkSchedWeek)
plot<-ggplot(melted_WorkSchedWeek) + geom_col(aes(x = Var1,y = value,fill = Var2),position = "fill") + theme(legend.position="right", axis.text.x = element_text(angle = 90, hjust = 1))
plot + labs(x="Time", y="Probabilities", colour="Work schedules", fill="Work schedules")
However I would like to create the above plot using ggplot (or lattice) . On x axis is time (0400 till 0345 _ 24hours) per days (Monday and Tuesday), y axis probability distributions. The plot is filled with work schedules values. Can somebody help me? Thanks
You can use facet_grid to make two graphs side by side but sharing an axis. But this requires you to first merge your two dataframes.
To do this we standardize your variables, add a day column, a time column and then use rbind:
WorkSchedMonday = data.frame(structure(c(9, 9, 9, 9, 18, 18, 36, 36, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivMondayAtHome",
"WorkSchedAndIndivMondayAtSingleWorkPlace", "WorkSchedAndIndivMondayAtVarietyOfPlaces",
"WorkSchedAndIndivMondayWorkingOnTheMove"))))
names(WorkSchedMonday) = c("AtHome", "SingleWork", "Variety", "OnTheMove")
WorkSchedMonday$time = rownames(WorkSchedMonday)
WorkSchedTuesday = data.frame(structure(c(0, 0, 0, 0, 9, 9, 27, 27, 36, 36, 64, 80, 96, 96,
112, 128, 168, 168, 296, 312, 14, 14, 14, 21, 21, 21, 21, 35,
49, 49, 12, 12, 6, 6, 0, 0, 0, 0, 6, 6), .Dim = c(10L, 4L), .Dimnames = list(
c("04:00", "04:15", "04:30", "04:45", "05:00", "05:15", "05:30",
"05:45", "06:00", "06:15"), c("WorkSchedAndIndivMondayAtHome",
"WorkSchedAndIndivMondayAtSingleWorkPlace", "WorkSchedAndIndivMondayAtVarietyOfPlaces",
"WorkSchedAndIndivMondayWorkingOnTheMove"))))
names(WorkSchedTuesday) = c("AtHome", "SingleWork", "Variety", "OnTheMove")
WorkSchedTuesday$time = rownames(WorkSchedTuesday)
WorkSchedMonday$day = "Monday"
WorkSchedTuesday$day = "Tuesday"
WorkSched = rbind(WorkSchedMonday, WorkSchedTuesday)
With that done, you can melt your dataframe like you did before and run the same ggplot, but with facet_grid along the variable that you want your graph to be separated by (day).
WorkSched_melt = melt(WorkSched, id.vars = c("time", "day"))
ggplot(WorkSched_melt, aes(x = time, y = value, fill = variable)) + geom_col(position = "fill") +
facet_grid(. ~ day) + theme(legend.position="right", axis.text.x = element_text(angle = 90, hjust = 1))
As a general rule, avoid using really big and clunky variable names, and also avoid having a necessary variable (in this case, time) as your row name.
Here is a solution with the data preparation code done with package dplyr.
library(ggplot2)
library(dplyr)
WorkSchedWeek <- cbind(WorkSchedMonday, WorkSchedTuesday)
WorkSchedWeek <- as.data.frame(WorkSchedWeek)
WorkSchedWeek <- cbind.data.frame(Hour = row.names(WorkSchedWeek), WorkSchedWeek)
melted_WorkSchedWeek <- reshape2::melt(WorkSchedWeek, id.vars = "Hour")
melted_WorkSchedWeek %>%
mutate(variable = sub("^WorkSchedAndIndiv", "", variable),
Month = sub("(^.{3}).*", "\\1", variable),
variable = sub("^.*day", "", variable)) %>%
ggplot(aes(x = Hour,y = value, fill = variable)) +
geom_col(position = "fill") +
theme(legend.position = "right",
axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~ Month)

Resources