Means barplot with confidence intervals? - r

I have a large dataset, where I have a variable Q1with 7 response/value options, and two groups (One and Two).
Q1<- c(6,4,2,4,7,1,4,7,4,5,4,4,2,6,1)
Group<- c(One, Two, One, Two,Two, Two, One, One, One, One, Two, One, One, Two, Two)
I'm trying to convert a simple frequency plot (number of observations in each response category by group) and instead plot the means with confidence intervals (as in the image below).
df1<- filter(df, Q1!="-99",df$Group=="One"|df$Group=="Two")
ggplot(data = df1, aes(x = Q1)) +
geom_bar(aes(fill = df1$Group), position = "dodge", stat="summary", fun.y="mean") + labs(title="Graph Title")
When i run this, I get the following error:
Error: stat_summary requires the following missing aesthetics: y
Any ideas are appreciated!

Here is an example. You need to pre-compute CIs yourself:
library(dplyr)
library(ggplot2)
set.seed(123)
df <- data.frame(g = c(rep("A",10),rep("B",10),rep("C",10)),
val = c(rnorm(10,100,5), rnorm(10,200,10), rnorm(10,300,50)))
df <- df %>% group_by(g) %>% summarise(m = mean(val),
stdv = sd(val))
ggplot(df, aes(g,m,fill=g)) +
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=m-stdv, ymax=m+stdv), width=.2,
position=position_dodge(.9))
Output
UPDATE
df <- data.frame(
Q1 = c(6,4,2,4,7,1,4,7,4,5,4,4,2,6,1),
Group = sample(c("One","Two"), 15, TRUE),
stringsAsFactors = FALSE)
df <- df %>% group_by(Group) %>% summarise(m = mean(Q1),
stdv = sd(Q1))
ggplot(df, aes(Group,m,fill=Group)) +
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=m-stdv, ymax=m+stdv), width=.2,
position=position_dodge(.9))

what about something like this
`ggplot(df.df, aes(x=category, color=group)) +
stat_summary(aes(y = value),
fun.y = mean, na.rm = TRUE,
geom = "bar",
size = 3) +
stat_summary(aes(y = value),
fun.data = mean_se, na.rm = TRUE,
geom = "errorbar",
width = 0.2) `

Related

How to put two mean plots on a single plane with different scale?

I obtained the two separate mean plots. Is there any simple way to combine them on a single plane with different line colours? Tricky part is each has a different scale, so I want to put one (lshare) scale on left hand side of y-axis and the other (va) on right side of y-axis.
p1 <- ggplot(df, aes(x = year, y = lshare)) + stat_summary(geom = "line", fun.y = mean)
p2 <- ggplot(df, aes(x = year, y = va)) + stat_summary(geom = "line", fun.y = mean)
grid.arrange(p1, p2, ncol = 2)
Update2:
Combining all:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1) +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
Update: How to add secodn y axis as requested:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
ggplot(aes(x=year))+
geom_line(aes(y=mpg*10), size=1, color="red")+
geom_line(aes(y=disp), size=1, color="blue") +
scale_y_continuous(
name = "my first y axis",
sec.axis = sec_axis(~./10, name="my second y axis")
)
First answer:
Here is a reproducible example with the mtcars dataset:
library(tidyverse)
mtcars %>%
select(mpg, disp) %>%
mutate(year = 1900:1931) %>%
pivot_longer(
c(mpg, disp)
) %>%
ggplot(aes(x=year, y=value, group=name, color=name))+
stat_summary(fun =mean, geom="line", size=1)
As #jdobres commented, you can use facet_wrap(), like in the following example. Simply introduce a grouping factor to your data.frame.
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare))
df$group <- factor(gl(2, length(year)))
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun.y = mean, size = 1) +
facet_wrap(~ group)
Addition
As per your edit, which I saw after I posted this answer, facet_wrap() also works when you want to have two different y-axes. You just have to play a bit with the function that is specified within sec_axis().
set.seed(1)
# sample data
year <- 1:20
lshare <- 0.50 - 0.02 * year + rnorm(length(year), sd = 3)
noise <- abs(rnorm(length(lshare), mean = 150, sd = 100))
df <- data.frame(year = c(year, year), lshare = c(lshare, lshare + noise))
df$group <- factor(gl(2, length(year)))
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(x = year, y = lshare, colour = group)) +
stat_summary(geom = "line", fun = mean, size = 1) +
facet_wrap(~ group) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right))
Addition 2
If you would like to have the two lines in the same pane, you can use something along the following lines of code. Note, I use the same data as in the first addition (see above).
# set two limits
ylim_left <- with(subset(df, group == 1), c(min(lshare), max(lshare)))
ylim_right <- with(subset(df, group == 2), c(min(lshare), max(lshare)))
axis_right <- diff(ylim_left)/diff(ylim_right)
axis_left <- ylim_left[1] - axis_right * ylim_right[1]
# plot
ggplot(df, aes(colour = group)) +
stat_summary(data = subset(df, group == 1),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
stat_summary(data = subset(df, group == 2),
mapping = aes(x = year, y = lshare),
geom = "line", fun = mean, size = 1) +
scale_y_continuous(sec.axis = sec_axis(~ (. - axis_left)/axis_right)) +
scale_colour_manual(name = 'My_groups',
values = c('1' = "blue4", '2' = "darkorange"),
labels = c('Group 1', 'Group 2'))

Horizontal percent total stacked bar chart with labels on each end

I have a simple data frame which has the probabilities that an id is real and fake, respectively:
library(tidyverse)
dat <- data.frame(id = "999", real = 0.7, fake = 0.3)
I know that I can show this as a horizontal bar chart using the code below:
dat %>%
gather(key = grp, value = prob, -id) %>%
ggplot(aes(x = id, y = prob, fill = grp)) +
geom_bar(stat = "identity") +
coord_flip()
But I was wondering if there was a way to show this in the same way as shown below, with the class labels and probabilities on either end of the bar chart?
Many thanks
A straight forward, maybe somewhat cheeky workaround is to re-define your 0.
I added a few calls that are not strictly necessary, but make it look closer to your example plot.
library(tidyverse)
dat <- data.frame(id = "999", real = -0.7, fake = 0.3) # note the minus sign!
dat %>%
gather(key = grp, value = prob, -id) %>%
ggplot(aes(x = id, y = prob, fill = grp)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = stringr::str_to_title(paste0(grp, " (", as.character(100*abs(prob)), "%)"))),
hjust = c(1,0))+
coord_flip(clip = "off") +
scale_fill_brewer(palette = "Greys") +
theme_void() +
theme(aspect.ratio = .1,
plot.margin = margin(r = 3, l = 3, unit = "lines"))
Created on 2021-02-06 by the reprex package (v0.3.0)
I'm not sure this fully answers the question but I think it will improve the plot, can you try it out?
dat %>%
gather(key = grp, value = prob, -id) %>%
ggplot(aes(x = id, y = prob, fill = grp)) +
geom_bar(stat = "identity", position = "fill") +
scale_y_continuous("Proportion") +
scale_x_discrete("", expand = c(0,0)) +
scale_fill_identity() +
coord_flip()

geom_bar not displaying mean values

I'm currently trying to plot mean values of a variable pt for each combination of species/treatments in my experiments. This is the code I'm using:
ggplot(data = data, aes(x=treat, y=pt, fill=species)) +
geom_bar(position = "dodge", stat="identity") +
labs(x = "Treatment",
y = "Proportion of Beetles on Treated Side",
colour = "Species") +
theme(legend.position = "right")
As you can see, the plot seems to assume the mean of my 5N and 95E treatments are 1.00, which isn't correct. I have no idea where the problem could be here.
Took a stab at what you are asking using tidyverse and ggplot2 which is in tidyverse.
dat %>%
group_by(treat, species) %>%
summarise(mean_pt = mean(pt)) %>%
ungroup() %>%
ggplot(aes(x = treat, y = mean_pt, fill = species, group = species)) +
geom_bar(position = "dodge", stat = "identity")+
labs(x = "Treatment",
y = "Proportion of Beetles on Treated Side",
colour = "Species") +
theme(legend.position = "right") +
geom_text(aes(label = round(mean_pt, 3)), size = 3, hjust = 0.5, vjust = 3, position = position_dodge(width = 1))
dat is the actual dataset. and I calculated the mean_pt as that is what you are trying to plot. I also added a geom_text piece just so you can see what the results were and compare them to your thoughts.
From my understanding, this won't plot the means of your y variable by default. Have you calculated the means for each treatment? If not, I'd recommend adding a column to your dataframe that contains the mean. I'm sure there's an easier way to do this, but try:
data$means <- rep(NA, nrow(data))
for (x in 1:nrow(data)) {
#assuming "treat" column is column #1 in your data fram
data[x,ncol(data)] <- mean(which(data[,1]==data[x,1]))
}
Then try replacing
geom_bar(position = "dodge", stat="identity")
with
geom_col(position = "dodge")
If your y variable already contains means, simply switching geom_bar to geom_col as shown should work. Geom_bar with stat = "identity" will sum the values rather than return the mean.

fill and group bar graphs by different variables

I am trying to create faceted geom_bar graphs with the following charactaristics:
The proportion of each answer per question is shown
Each bar is colored according to the response
The plot is faceted by question
I seem to be able to do any two of the conditions, but not all 3.
Question:
Is there a way to facet and calculate proportions using one variable, but colour/fill based on another variable?
Code:
df <- data.frame(
Question = rep(c('A', 'B', 'C'), each = 5),
Resp = sample(c('Yes', 'No', 'Unsure', NA), 15, T, c(0.3,0.3,0.3,0.1)),
stringsAsFactors = F
)
# Plot 1: grouping by question to get the right proportions, but has no colour
ggplot(df, aes(x = Resp, fill = Resp)) +
stat_count(aes(y = ..prop.., group = Question)) +
scale_y_continuous(labels = scales::percent_format()) +
facet_wrap(~ Question)
# Plot 2: grouping by response to get colour, but has wrong proportions
ggplot(df, aes(x = Resp, fill = Resp)) +
stat_count(aes(y = ..prop.., group = Resp)) +
scale_y_continuous(labels = scales::percent_format()) +
facet_wrap(~ Question)
Outputs:
This is a "ggplot2-only" option:
ggplot(df, aes(x = Resp)) +
geom_bar(aes(y = ..prop.., group = Question, fill = factor(..x..)), position = "dodge") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_discrete(name = "Response", labels = c("No", "Unsure", "Yes", "NA")) +
facet_wrap(~ Question)
One way could be to calculate the proportions and then plot.
library(dplyr)
library(ggplot2)
df %>%
count(Question, Resp) %>%
group_by(Question) %>%
mutate(n = n/sum(n) * 100) %>%
ggplot() + aes(Resp, n, fill = Resp) +
geom_col() +
facet_wrap(~Question)
Plot without facet
df$n <- 1
df <- df %>% group_by(Question, Resp) %>% summarise(n = sum(n))
ggplot(df, aes(x=factor(Question), y=n, fill=Resp)) + geom_col()
Plot with facet
df <- df %>% group_by(Question, Resp) %>% summarise(n = sum(n)) %>% mutate(prop = n/5)
ggplot(df, aes(x=factor(Resp), y=prop, fill=Resp)) + geom_col() + facet_wrap(~Question)

Draw a line on top of stacked bar_plot

I would like to draw a line (or making points) on top of my stacked bar_plots. As I have no real data points I can refer to (only the spereated values and not the sum of them) I don't know how I can add such line. The Code produce this plot:
I want to add this black line(my real data are not linear):
library(tidyverse)
##Create some fake data
data3 <- tibble(
year = 1991:2020,
One = c(31:60),
Two = c(21:50),
Three = c(11:40)
)
##Gather the variables to create a long dataset
new_data3 <- data3 %>%
gather(model, value, -year)
##plot the data
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack")
You can use stat_summary and sum for the summary function:
ggplot(new_data3, aes(year, value)) +
geom_col(aes(fill = model)) +
stat_summary(geom = "line", fun.y = sum, group = 1, size = 2)
Result:
You could get sum by year and plot it with new geom_line
library(dplyr)
library(ggplot2)
newdata4 <- new_data3 %>%
group_by(year) %>%
summarise(total = sum(value))
ggplot(new_data3, aes(x = year, y = value, fill=model)) +
geom_bar(stat = "identity",position = "stack") +
geom_line(aes(year, total, fill = ""), data = newdata4, size = 2)

Resources