I have a plot where there are 4 groups (bars 1 + 2, bars 3 + 4, etc), but there's no indication of this in the dataset. How can I manually add space, add a shared group label (x-axis) and recolour the 4 groups?
Data (melted):
Curent Ggplot code:
xdr<-melt(result)
ggplot(
aes(x = variable, y = value), data = xdr) +
stat_summary(fun.y = "mean", geom = "bar") +
coord_cartesian(ylim=c(0.6,0.85)) +
stat_summary(fun.y = mean, geom = "bar") +
stat_summary(fun.data = mean_se, geom = "errorbar")
I think you'll want to use mutate() to add the groups, in whatever way is appropriate for your data, and then facet_wrap() to make the kind of subgraphs you are talking about.
library(tidyverse)
df <- tribble(
~variable, ~value,
"baseline1", 0.730,
"baseline2", 0.521,
"baseline3", 0.762,
"baseline4", 0.655,
"baseline5", 0.604,
"baseline6", 0.710,
"baseline7", 0.528,
"baseline8", 0.172
)
df %>%
mutate(group = (row_number() + 1) %/% 2,
group = paste("Group", group)) %>%
ggplot(aes(variable, value, fill = group)) +
geom_col(show.legend = FALSE) +
facet_wrap(~group, nrow = 1, scales = "free_x")
Related
I obtained the two separate mean plots. Is there any simple way to combine them on a single plane with different line colours? Tricky part is each has a different scale, so I want to put one (lshare) scale on left hand side of y-axis and the other (va) on right side of y-axis.
p1 <- ggplot(df, aes(x = year, y = lshare)) + stat_summary(geom = "line", fun.y = mean)
p2 <- ggplot(df, aes(x = year, y = va)) + stat_summary(geom = "line", fun.y = mean)
grid.arrange(p1, p2, ncol = 2)
Update2:
Combining all:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1) +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
Update: How to add secodn y axis as requested:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
ggplot(aes(x=year))+
geom_line(aes(y=mpg*10), size=1, color="red")+
geom_line(aes(y=disp), size=1, color="blue") +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
First answer:
Here is a reproducible example with the mtcars dataset:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1)
As #jdobres commented, you can use facet_wrap(), like in the following example. Simply introduce a grouping factor to your data.frame.
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare))
df$group <- factor(gl(2, length(year)))
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun.y = mean, size = 1) +
facet_wrap(~ group)
Addition
As per your edit, which I saw after I posted this answer, facet_wrap() also works when you want to have two different y-axes. You just have to play a bit with the function that is specified within sec_axis().
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
noise <- abs(rnorm(length(lshare), mean = 150, sd = 100))
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare + noise))
df$group <- factor(gl(2, length(year)))
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun = mean, size = 1) +
facet_wrap(~ group) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right))
Addition 2
If you would like to have the two lines in the same pane, you can use something along the following lines of code. Note, I use the same data as in the first addition (see above).
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(colour = group)) +
stat_summary(data = subset(df, group == 1),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
stat_summary(data = subset(df, group == 2),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right)) +
scale_colour_manual(name = 'My_groups',
values = c('1' = "blue4", '2' = "darkorange"),
labels = c('Group 1', 'Group 2'))
I am trying to create a plot in ggplot2 similar to this one:
Here is the code I am using:
Dataset %>%
group_by(Participant, Group, Emotion) %>%
ggplot(aes(y = Score, x = Emotion, fill = Group, colour = Group)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .4) +
geom_point(aes(y = Score, color = Group), position = position_jitter(width = .15), size = 3, alpha = 0.4) +
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48') +
stat_summary(fun = mean, geom = 'pointrange', width = 0.2, size = 2, alpha = 1.2, position=position_dodge(width=0.3)) +
stat_summary(fun.data = mean_se, geom='errorbar', width = 0.25, size = 2.2, alpha = 1.2, linetype = "solid",position=position_dodge(width=0.3)) +
guides(color = FALSE) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
ylim(0, 100) +
graph_theme
What I am failing to do is set up the stat_summary(geom = 'line') to connect the green and orange means within each emotion on the x-axis. Could anyone give any pointers on this? I'd also like all the other features to stay the same if possible (e.g., I wouldn't like to use facet_grid or facet_wrap).
Thank you!
When I change the group argument in stat_summary to 'Group' instead of 'Emotion', means for each group are connected across emotions, but I can't figure out how to connect means of different groups within each emotion:
This is a tricky one because your line needs to connect points that have different x values but even if you jitter in the point layer, they still technically have the same x value so the line doesn't know how to connect them. What others have done is to manually add the jitter to force the points to have a different x position. For more inspiration check out this, this and this. Here's an example:
library(tidyverse)
set.seed(1)
emotion <- c("anger", "fear", "sadness")
group <- letters[1:2]
participant <- 1:10
dat <- expand_grid(emotion, group, participant) %>%
mutate(across(everything(), as.factor),
score = sample(x = 1:100, size = nrow(.), replace = T))
dat %>%
mutate(new_emot = case_when(
group == "a" ~as.numeric(emotion) - 0.125,
group == "b" ~as.numeric(emotion) + 0.125
)) %>%
ggplot(aes(x = emotion, y = score)) +
stat_summary(aes(color = group), fun = mean, geom = "point", position = position_dodge(width = 0.5)) +
stat_summary(aes(color = group), fun.data = mean_se, geom = "errorbar", width = 0.5, position = position_dodge(width = 0.5)) +
stat_summary(aes(x = new_emot, group = emotion), fun = mean, geom = "line") +
theme_bw()
Created on 2021-03-24 by the reprex package (v1.0.0)
Setting geom_line to the same position as pointrange and errorbar will solve the problem.
i.e.,
stat_summary(aes(y = Score, group = Emotion), fun.y = mean, geom="line", size = 2.2, alpha = 1.2, width = 0.25, colour = 'gray48', position=position_dodge(width=0.3))
I am plotting a distribution of two variables on a single histogram. I am interested in highlighting each distribution's mean value on that graph through a doted line or something similar (but hopefully something that matches the color present already in the aes section of the code).
How would I do that?
This is my code so far.
hist_plot <- ggplot(data, aes(x= value, fill= type, color = type)) +
geom_histogram(position="identity", alpha=0.2) +
labs( x = "Value", y = "Count", fill = "Type", title = "Title") +
guides(color = FALSE)
Also, is there any way to show the count of n for each type on this graph?
i've made some reproducible code that might help you with your problem.
library(tidyverse)
# Generate some random data
df <- data.frame(value = c(runif(50, 0.5, 1), runif(50, 1, 1.5)),
type = c(rep("type1", 50), rep("type2", 50)))
# Calculate means from df
stats <- df %>% group_by(type) %>% summarise(mean = mean(value),
n = n())
# Make the ggplot
ggplot(df, aes(x= value, fill= type, color = type)) +
geom_histogram(position="identity", alpha=0.2) +
labs(x = "Value", y = "Count", fill = "Type", title = "Title") +
guides(color = FALSE) +
geom_vline(data = stats, aes(xintercept = mean, color = type), size = 2) +
geom_text(data = stats, aes(x = mean, y = max(df$value), label = n),
size = 10,
color = "black")
If things go as intended, you'll end up something akin to the following plot.
histogram with means
data=data.frame("grade"=c(1, 2, 3, 1, 2, 3),
"class"=c('a', 'a', 'a', 'b', 'b', 'b'),
"size"=c(1, 1, 2, 2, 2, 1),
"var"=c('q33', 'q35', 'q39', 'q33', 'q35', 'q39'),
"score"=c(5, 8, 7, 3, 7, 5))
My data have many group variables.
First I want to just plot 'score' by 'grade' with a line
library(reshape2, ggplot2)
ggplot(data, aes(x = grade, y = score)) + geom_line()
It gives a funny graph because I have 'grade' repeated for different classes and sizes.
If I take a subset of my data then the graph looks ok.
ggplot(subset(data, size == 1), aes(x = grade, y = score)) + geom_line()
So I wonder how can I plot my data 'score' by 'grade' for ALL combinations without the graph somehow combining all values?
Here is one approach. You can plot score vs. grade, and use stat_summary to add a line going through mean at each grade, and a ribbon that contains the 95% confidence interval. Is this what you had in mind?
library(ggplot2)
ggplot(data = data, mapping = aes(x = grade, y = score)) +
stat_summary(geom = "line", fun = mean, linetype = "dashed") +
stat_summary(geom = "ribbon", fun.data= mean_cl_normal, fun.args = list(conf.int=0.95), alpha=.1) +
scale_x_continuous(breaks = data$grade)
Plot
Alternatively, you can plot points for mean values at each grade and standard error bars.
library(tidyverse)
data %>%
group_by(grade) %>%
summarise(mean_score = mean(score),
SD = sd(score),
n = n(),
SE = SD/sqrt(n)) %>%
ggplot(mapping = aes(x = grade, y = mean_score)) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymin = mean_score - SE, ymax = mean_score + SE), width = .1) +
scale_x_continuous(breaks = data$grade)
Plot
You could use facet_wrap(~class+size) this will give one plot per combination.
In my dat2 file I have a series of body measurements for various subjects. These body measurements are bicep, tricep, suprailiac, sub scapular....ffm (fat free mass). I have taken these measurements at 0,6,12 weeks. The subjects were on either treatment 'a' or treatment 'b'.
I would like to create multiple graphs for both treatments where Y = Body Measurement and X = Time. I know how to do this for just one variable like "weight".
line <- ggplot(dat2, aes(time, weight))
line + stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
facet_wrap(~ treatment, nrow = 4) + labs(x = "Time", y = "Weight") +
scale_x_continuous(expand = c(0, 0), breaks = c(0, 6, 12))
I tried the following code to do this for multiple variables and to print multiple graphs.
dat2 %>% select(code:solidfatstotallog, time, bicep, tricep, subscapular, suprailiac, weight, wc, bia, bmi, wthr, X.fat, fm, ffm, height, density)
dat2 %>%
select(code, treatment, time, bicep:ffm)
gather(body_measure, value, -c(code, treatment, time)) %>%
ggplot(aes(time, body_measure)) +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
facet_wrap(~ treatment, nrow = 4) + labs(x = "Time", y = "Body_Measure") +
scale_x_continuous(expand = c(0, 0), breaks = c(0, 6, 12))
However, this just creates one graph of Y = weight and X = Time. With different labels where Y is now body_measure. What am I missing here?
See example picture for Weight by Time. I would like to create multiple such graphs, ideally on a single page.
So I was able to create a facet_grid, but the graph is not nice to look at. I used the following code.
dat2 %>%
select(code, treatment, time, bicep:ffm, -c(density, height)) %>% #drop irrelevant variables
gather(measure, score, -c(code, treatment, time)) %>% #data into long form
ggplot(aes(time, score)) +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
facet_grid(treatment ~ measure) + labs(x = "Time", y = "Body_Measure") +
scale_x_continuous(expand = c(0, 0), breaks = c(0, 6, 12))