Using group argument in aes() in ggplot2 - r

I am trying to use the "group" argument in aes() in ggplot2, and I am not sure why it is not working as I currently have it.
I would like an image that groups my "maskalthalf" variable in the way that this image uses "sex" (found here).
This is what my graph currently looks like.
This is the code I have so far.
ggplot(groups, aes(x = message, y = mean, group = factor(maskalthalf))) +
geom_bar(stat = "identity", width = 0.5, fill = "003900") +
geom_text(aes(label = round(mean, digits = 1), vjust = -2)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .2, position = position_dodge(.9)) +
labs(title = "Evaluations of Personal and General Convincingness") +
ylab("Rating") +
xlab("Personal evaluation or general evaluation") +
ylim(0, 8)
This is a sketch of what I am aiming for:
Data:
structure(list(maskalthalf = c("High", "High", "Low", "Low"),
message = c("General", "Personal", "General", "Personal"),
mean = c(4.79090909090909, 6.38181818181818, 4.69879518072289,
4.8433734939759), se = c(0.144452868727642, 0.104112130946133,
0.149182255019704, 0.180996951567937)), row.names = c(NA,
-4L), groups = structure(list(maskalthalf = c("High", "Low"),
.rows = structure(list(1:2, 3:4), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))

The image in your first example uses facets to group by variable. So you could try that:
ggplot(df1, aes(x = message, y = mean)) +
geom_col(width = 0.5, fill = "003900") +
geom_text(aes(label = round(mean, digits = 1), vjust = -2)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .2, position = position_dodge(.9)) +
labs(title = "Evaluations of Personal and General Convincingness") +
ylab("Rating") +
xlab("Personal evaluation or general evaluation") +
ylim(0, 8) +
facet_wrap(~maskalthalf)

Related

How can I add a legend to this graph in R

I have made a graph from 2 different tables, red is users and blue is non users. How can I create a legend.
structure(list(Attribute = c("Nscore", "Escore", "Oscore", "Ascore",
"Cscore", "Impulsivity", "SS"), Mean = c(0.519519745762712, -0.224147033898305,
0.345051694915254, -0.542761016949153, -0.432290169491526, 0.573723898305084,
0.625454406779661), lower_bound = c(0.345515567755788, -0.421929253136834,
0.173007836159723, -0.743825778750619, -0.620318735695037, 0.417301607369938,
0.461852381381636), upper_bound = c(0.693523923769636, -0.0263648146597761,
0.517095553670785, -0.341696255147686, -0.244261603288014, 0.730146189240231,
0.789056432177685)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-7L))
structure(list(Attribute = c("Nscore", "Escore", "Oscore", "Ascore",
"Cscore", "Impulsivity", "SS"), Mean = c(-0.0346437351443125,
0.014794833050368, -0.0236125863044712, 0.0359841765704582, 0.0284564233163561,
-0.0306152461799648, -0.0452792359932086), lower_bound = c(-0.0809079532901976,
-0.0313902162687121, -0.0700665170072849, -0.00972637107557454,
-0.0176800056429342, -0.0748829267378616, -0.089769617527101),
upper_bound = c(0.0116204830015727, 0.0609798823694481, 0.0228413443983426,
0.0816947242164909, 0.0745928522756465, 0.0136524343779321,
-0.000788854459316299)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -7L))
ggplot(UserCI, aes(Attribute, Mean)) +
geom_point() + geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), colour="red") +
geom_point(data = NonUserCI, aes(Attribute, Mean)) +
geom_errorbar(aes(ymin =NonUserCI$lower_bound, ymax = NonUserCI$upper_bound), colour = "blue")
If you want to get a legend then map on aesthetics, i.e. map on the color aes inside aes() and set your desired colors via scale_color_manual:
library(ggplot2)
ggplot(UserCI, aes(Attribute, Mean)) +
geom_point() +
geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound, color = "user")) +
geom_point(data = NonUserCI, aes(Attribute, Mean)) +
geom_errorbar(data = NonUserCI, aes(ymin = lower_bound, ymax = upper_bound, color = "nonuser")) +
scale_color_manual(
values = c("user" = "red", "nonuser" = "blue"),
labels = c("user" = "User", "nonuser" = "Non-User")
)

Reordering variables using fct_relevel with ggplot in R

I am trying to use fct_relevel() to reorder variables in a graph. I have tried changing the column to a factor. I am unsure why my code isn't working. I need "Owned Panels" to show up in front of "Did Not Own Panels". I am also open to alternatives that don't rely on fct_relevel().
Graph code:
groups %>%
mutate(panels = fct_relevel(panels), "Owned Panels", "Did Not Own Panels") %>%
ggplot(., aes(x=reason, y = mean, fill = panels)) +
geom_bar(stat = "identity", color = "black", position = position_dodge()) +
geom_errorbar(aes(ymin = mean - se, ymax = mean+se), width = .2, position = position_dodge(.9)) +
geom_text(aes(label = round(mean, digits =2)), position = position_dodge(width=1.0), vjust = -1.5) +
#facet_wrap(~dv) +
labs(title = ~ "Likelihood of solar panel installation after meeting ambassador",
y = "Likelihood of installing solar panels",
x = "Reason to install solar panels") +
scale_fill_discrete(name = "Ambassador solar\npanel ownership") +
scale_y_continuous(limits = c(1, 7), oob = scales::oob_squish)
Data:
structure(list(dv = c("behavior", "behavior", "behavior", "behavior"
), panels = c("Owned Panels", "Owned Panels", "Did Not Own Panels",
"Did Not Own Panels"), reason = c("Environment", "Money", "Environment",
"Money"), mean = c(5.15789473684211, 5.36065573770492, 4.85454545454545,
4.35483870967742), se = c(0.224988824122626, 0.194223670966034,
0.187878787878788, 0.210884132880012)), row.names = c(NA, -4L
), groups = structure(list(dv = c("behavior", "behavior"), panels = c("Did Not Own Panels",
"Owned Panels"), .rows = structure(list(3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
In the OP's code, the fct_relevel was closed before specifying the levels
library(forcats)
library(dplyr)
library(ggplot2)
groups %>%
mutate(panels = fct_relevel(panels), "Owned Panels", "Did Not Own Panels")
^
Instead, it would be (also ungroup before doing this)
groups %>%
ungroup %>%
mutate(panels = fct_relevel(panels, "Owned Panels", "Did Not Own Panels") ) %>%
ggplot(., aes(x=reason, y = mean, fill = panels)) +
geom_bar(stat = "identity", color = "black", position = position_dodge()) +
geom_errorbar(aes(ymin = mean - se, ymax = mean+se), width = .2, position = position_dodge(.9)) +
geom_text(aes(label = round(mean, digits =2)),
position = position_dodge(width=1.0), vjust = -1.5) +
#facet_wrap(~dv) +
labs(title = ~ "Likelihood of solar panel installation after meeting ambassador",
y = "Likelihood of installing solar panels",
x = "Reason to install solar panels") +
scale_fill_discrete(name = "Ambassador solar\npanel ownership") +
scale_y_continuous(limits = c(1, 7), oob = scales::oob_squish)

Changing lower limit of graph y-axis when x is a discrete variable without cutting off bottom of bars

I am trying to adjust the y-axis of this graph so that it starts at 1 instead of 0. What's the best way to do this?
The solution offered here cuts off the bottom of the graph. I would like for the bars to look pretty much identical to the graph below, but with the lower y-limit at 1 and each bar moved down 1 unit to match. I would like to preserve the small amount of gray space below each bar.
Code:
groups %>%
ungroup() %>%
mutate(message = fct_relevel(message, "Personal", "General"),
enviroattitudeshalf = fct_relevel(enviroattitudeshalf, "Low Environmental Attitudes", "High Environmental Attitudes")) %>%
ggplot(aes(x = message, y = mean)) +
geom_col(width = 0.5, fill = "003900") +
geom_text(aes(label = round(mean, digits = 1), vjust = -2)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .2, position = position_dodge(.9)) +
labs(title = "Environment: Evaluations of Personal and General Convincingness",
y = "Rating",
x = "Personal evaluation or general evaluation") +
ylim(0, 8) +
facet_wrap(~enviroattitudeshalf)
Data:
structure(list(enviroattitudeshalf = c("Low Environmental Attitudes",
"Low Environmental Attitudes", "High Environmental Attitudes",
"High Environmental Attitudes"), message = c("General", "Personal",
"General", "Personal"), mean = c(3.89473684210526, 3.37894736842105,
4.43636363636364, 5.10909090909091), se = c(0.145460372156746,
0.19522803582675, 0.160549137262631, 0.171509247396541)), row.names = c(NA,
-4L), groups = structure(list(enviroattitudeshalf = c("High Environmental Attitudes",
"Low Environmental Attitudes"), .rows = structure(list(3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
As an alternative to re-labeling the y-axis, you can cut it off at 1 by setting oob = scales::oob_squish. The out of bounds squish function sets the values that are out of bounds to the nearest limit. This preserves the upper part of the bar, giving the same interpretation, whereas relabeling would suggest the first bar exceeds the value 4, and it doesn't.
groups %>%
ungroup() %>%
mutate(message = fct_relevel(message, "Personal", "General"),
enviroattitudeshalf = fct_relevel(enviroattitudeshalf, "Low Environmental Attitudes", "High Environmental Attitudes")) %>%
ggplot(aes(x = message, y = mean)) +
geom_col(width = 0.5, fill = "003900") +
geom_text(aes(label = round(mean, digits = 1), vjust = -2)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .2, position = position_dodge(.9)) +
labs(title = "Environment: Evaluations of Personal and General Convincingness",
y = "Rating",
x = "Personal evaluation or general evaluation") +
facet_wrap(~enviroattitudeshalf) +
scale_y_continuous(limits = c(1, 8), oob = scales::oob_squish)
Using scale_y_continuous you can adjust breaks and labels.
library(tidyverse)
groups %>%
ungroup() %>%
mutate(message = fct_relevel(message, "Personal", "General"),
enviroattitudeshalf = fct_relevel(enviroattitudeshalf, "Low Environmental Attitudes", "High Environmental Attitudes")) %>%
ggplot(aes(x = message, y = mean)) +
geom_col(width = 0.5, fill = "003900") +
geom_text(aes(label = round(mean, digits = 1), vjust = -3)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .2, position = position_dodge(.9)) +
labs(title = "Environment: Evaluations of Personal and General Convincingness",
y = "Rating",
x = "Personal evaluation or general evaluation") +
scale_y_continuous(breaks = 0:8, labels = 1:9) +
facet_wrap(~enviroattitudeshalf, scales = 'free_y')

Changing lower limit of graph y-axis when x is a discrete variable

I am trying to adjust the y-axis of this graph so that it starts at 1 instead of 0. What's the best way to do this?
One solution I have used in the past for this type of issue is to use geom_rect instead of geom_bar. However, this does not work here because the x-axis is not continuous.
I have tried to create a separate, continuous x-variable, but it really messes up the formatting of the graph.
I would like for it to look like this graph, but with the lower y-limit at 1 and each bar moved down 1 unit to match.
Code:
groups %>%
ungroup() %>%
mutate(message = fct_relevel(message, "Personal", "General"),
enviroattitudeshalf = fct_relevel(enviroattitudeshalf, "Low Environmental Attitudes", "High Environmental Attitudes")) %>%
ggplot(aes(x = message, y = mean)) +
geom_col(width = 0.5, fill = "003900") +
geom_text(aes(label = round(mean, digits = 1), vjust = -2)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .2, position = position_dodge(.9)) +
labs(title = "Environment: Evaluations of Personal and General Convincingness",
y = "Rating",
x = "Personal evaluation or general evaluation") +
ylim(0, 8) +
facet_wrap(~enviroattitudeshalf)
Data:
structure(list(enviroattitudeshalf = c("Low Environmental Attitudes",
"Low Environmental Attitudes", "High Environmental Attitudes",
"High Environmental Attitudes"), message = c("General", "Personal",
"General", "Personal"), mean = c(3.89473684210526, 3.37894736842105,
4.43636363636364, 5.10909090909091), se = c(0.145460372156746,
0.19522803582675, 0.160549137262631, 0.171509247396541)), row.names = c(NA,
-4L), groups = structure(list(enviroattitudeshalf = c("High Environmental Attitudes",
"Low Environmental Attitudes"), .rows = structure(list(3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Use coord_cartesian :
library(tidyverse)
groups %>%
ungroup() %>%
mutate(message = fct_relevel(message, "Personal", "General"),
enviroattitudeshalf = fct_relevel(enviroattitudeshalf, "Low Environmental Attitudes", "High Environmental Attitudes")) %>%
ggplot(aes(x = message, y = mean)) +
geom_col(width = 0.5, fill = "003900") +
geom_text(aes(label = round(mean, digits = 1), vjust = -2)) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .2, position = position_dodge(.9)) +
labs(title = "Environment: Evaluations of Personal and General Convincingness",
y = "Rating",
x = "Personal evaluation or general evaluation") +
coord_cartesian(ylim = c(1, 8)) +
facet_wrap(~enviroattitudeshalf)

vjust inconsistent in stacked bar plot

I have a stacked bar plot, with highly unequal heights of bars. I would like to show the percentages on top of each bar.
What I have done so far is the following
df = structure(list(Type = c("Bronchoscopy", "Bronchoscopy", "Endoscopy",
"Endoscopy"), Bacteremia = structure(c(1L, 2L, 1L, 2L), .Label = c("False",
"True"), class = "factor"), count = c(2710L, 64L, 13065L, 103L
), perc = c(97.6928622927181, 2.3071377072819, 99.2178007290401,
0.782199270959903)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -4L), groups = structure(list(
Type = c("Bronchoscopy", "Endoscopy"), .rows = list(1:2,
3:4)), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE))
ggplot(df, aes(x = Type, y = perc, fill = Bacteremia)) +
geom_bar(stat = "identity") +
ylab("percent") +
geom_text(aes(label = paste0(round(perc, 2), "%")), position =
position_stack(vjust = -0.1), color = "black", fontface = "bold")
I can't seem to get the vjust right. It seems like it's not behaving in the same way for the bottom versus the top bar.
What I would like to achieve is to place the percentages slightly higher than the top edge of each bar.
Any ideas?
Here's a possible approach:
ggplot(df, aes(x = Type, y = perc, fill = Bacteremia)) +
geom_bar(stat = "identity") +
ylab("percent") +
geom_text(aes(label = paste0("", round(perc, 2), "%\n"), y = perc),
color = "black", fontface = "bold", nudge_y = 2)
I should elaborate that ggplot2 is going to try to place the geom_text() relative to the data. If you are trying to align horizontally the text labels, you will need to either use annotate() or supply a labelling dataset with type, percent and Bacteremia and call that in geom_text() as below.
labdf <- cbind(df, ypos = c(103, 5, 103, 5))
ggplot(df, aes(x = Type, y = perc, fill = Bacteremia)) +
geom_bar(stat = "identity") +
ylab("percent") +
geom_text(data = labdf,
aes(label = paste0("", round(perc, 2), "%"), y = ypos, x = Type),
color = "black", fontface = "bold")
Here's one way to do it:
df <-
tibble(
Type = c("Bronchoscopy", "Bronchoscopy", "Endoscopy", "Endoscopy"),
Bacteremia = c("False", "True", "False", "True"),
count = c(2710L, 64L, 13065L, 103L)
) %>%
group_by(Type) %>%
mutate(Percent = round((count / sum(count) * 100), 1))
df %>%
ggplot(aes(x = Type, y = Percent, fill = Bacteremia)) +
geom_col() +
geom_label(
data = . %>% filter(Bacteremia == "True"),
aes(y = Percent + 5, label = str_c(Percent, "%")),
show.legend = FALSE
) +
geom_label(
data = . %>% filter(Bacteremia == "False"),
aes(y = 105, label = str_c(Percent, "%")),
show.legend = FALSE
)
The choices of 5 and 105 work on my computer, but may need to be tweaked a bit based on your specific settings and aspect ratio. The first geom_label call sets the y-axis based on the precise percentage, while the second one sets it at a constant level above the bars.
You might also want to play around with using geom_text vs. geom_label to experiment with different color and label settings. The nice thing about geom_label is that it will make it very clear which group is being labeled.

Resources