I want to create a pie chart with some labels displayed at certain location signifying whether a proportion test is significant or not. I have managed to create a basic pie chart and I have pasted the code below. I have also posted the desired result (created using MS Paint software) and would love any help I can get any creating this plot.
library(tidyverse)
# defining the dataframe
df <-
data.frame(
condition = c('x', 'y', 'z'),
cat = rep(c('a', 'b'), 3),
freq = c(60, 34, 44, 40, 66, 56)
)
# computing percentages
df <-
df %>% group_by(condition) %>% mutate(label = freq / sum(freq) * 100)
# creating a pie chart
ggplot2::ggplot(data = df, mapping = aes('', freq, fill = cat)) +
facet_grid(". ~ condition") +
geom_col(position = 'fill') +
geom_label(aes(label = label), position = position_fill(vjust = 0.5)) +
coord_polar(theta = 'y') +
ggplot2::scale_y_continuous(breaks = NULL) +
ggplot2::theme_grey() +
ggplot2::theme(
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
strip.text.x = element_text(size = 14, face = "bold"),
strip.text.y = element_text(size = 14, face = "bold"),
strip.text = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 14),
legend.title = element_text(size = 14, face = "bold"),
legend.title.align = 0.5,
legend.text.align = 0.5,
legend.direction = "horizontal",
legend.position = "bottom",
legend.key = element_rect(size = 5),
legend.key.size = unit(1.5, "lines"),
legend.margin = margin(5, 5, 5, 5),
legend.box.margin = margin(5, 5, 5, 5),
panel.border = element_rect(
colour = "black",
fill = NA,
size = 1
),
plot.subtitle = element_text(
color = "black",
size = 14,
hjust = 0.5
),
plot.title = element_text(
color = "black",
size = 16,
face = "bold",
hjust = 0.5
)
) +
ggplot2::guides(fill = guide_legend(override.aes = base::list(colour = NA)))
Created on 2018-03-21 by the reprex package (v0.2.0).
Here is what the desired result looks like (notice that the text is always at the top and aligned to the center of the pie chart)-
How about this? I used geom_text and specified x = 1.6 to get labels outside the radius of the pie chart. Also, just a style tip, but you have a lot of unnecessary arguments in theme(), and you don't need to put ggplot2:: before ggplot functions after loading the tidyverse. Also, if you'd like the pie chart to not have a small empty dot in the center of it, you can put width = 1 in your call to geom_col.
library(tidyverse)
df <-
data.frame(
condition = c('x', 'y', 'z'),
cat = rep(c('a', 'b'), 3),
freq = c(60, 34, 44, 40, 66, 56),
sig =c("***", NA, "ns", NA, "**", NA)
) %>%
group_by(condition) %>%
mutate(label = freq / sum(freq) * 100)
ggplot(data = df, mapping = aes('', freq, fill = cat)) +
facet_wrap(~ condition, nrow = 1) +
geom_col(position = 'fill', width = 1) +
geom_label(aes(label = label), position = position_fill(vjust = 0.5)) +
geom_text(aes(label = sig, x = 1.6), position = position_fill(vjust = 1)) +
coord_polar(theta = 'y') +
theme_grey() +
theme(panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
strip.text = element_text(size = 14, face = "bold"),
strip.background = element_rect(color = "black", size = 1),
legend.text = element_text(size = 14),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "bottom",
legend.key.size = unit(1.5, "lines"),
panel.border = element_rect(colour = "black", fill = NA, size = 1)) +
guides(fill = guide_legend(override.aes = list(colour = NA)))
Here's the result
Related
I have adapted the solution to align forest plot and a table from this post:
how to align table with forest plot (ggplot2)
Here is my code:
library(dplyr, warn = FALSE)
library(ggplot2)
library(patchwork)
tester <- data.frame(
treatmentgroup = c("Education Continuous", "0", "1-4",
"5-8", ">8"),
or = c(0.914, 0.961, 0.709, 0.523, 0.457),
low_ci = c(0.894, 0.793, 0.577, 0.389, 0.339),
up_ci = c(0.935, 1.166, 0.871, 0.708, 0.616),
OR_ci = c(
"0.914 (0.894; 0.935)", "0.961 (0.793; 1.166)", "0.709 (0.577; 0.871)",
"0.523 (0.389; 0.708)", "0.457 (0.339; 0.616)"),
ci = c(
"0.894; 0.935",
"0.793; 1.166",
"0.577; 0.871",
"0.389; 0.708",
"0.339; 0.616"),
no = c(1, 2, 3, 4, 5)
)
forest <- ggplot(
data = tester,
aes(x = treatmentgroup, y = or, ymin = low_ci, ymax = up_ci)) +
geom_pointrange(aes(col = treatmentgroup)) +
geom_hline(yintercept = 1, colour = "black") +
xlab("") +
ylab("OR (95% CI)") +
geom_errorbar(aes(ymin = low_ci, ymax = up_ci, col = treatmentgroup), width = 0, cex = 1) +
theme_classic() +
theme(
panel.background = element_blank(), strip.background = element_rect(colour = NA, fill = NA),
strip.text.y = element_text(face = "bold", size = 12),
panel.grid.major.y = element_line(colour = col_grid, size = 0.5),
strip.text = element_text(face = "bold"),
panel.border = element_rect(fill = NA, color = "black"),
legend.position = "none",
axis.text = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", hjust = 0.5, size = 13)
) +
coord_flip()
dat_table <- tester %>%
select(treatmentgroup, OR_ci) %>%
tidyr::pivot_longer(c(OR_ci), names_to = "stat") %>%
mutate(stat = factor(stat, levels = "OR_ci"))
table_base <- ggplot(dat_table, aes(stat, treatmentgroup, label = value)) +
geom_text(size = 3) +
scale_x_discrete(position = "top", labels = "OR (95% CI)") +
labs(y = NULL, x = NULL) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.line = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(size = 12),
axis.ticks = element_blank(),
axis.title = element_text(face = "bold"),
)
forest + table_base + plot_layout(widths = c(10, 4))
However, my graph ends up with the categories out of order. How can I adjust the order to this one: Education Continuous, 0, 1-4, 5-8, and >8?
I tried factor(tester$treatmentgroup) but it did not work.
Also, how can I make all the categories the same color (black, for example) instead of one each color? I tried eliminating the line geom_pointrange(aes(col = treatmentgroup)) + but it does not work.
You're right that you can convert treatmentgroup to a factor, you just need to specify the levels. Try running this code before you generate your plots with ggplot().
tester <- tester %>%
mutate(treatmentgroup = factor(treatmentgroup,
levels = c(">8", "5-8", "1-4", "0", "Education Continuous")))
P-values can be added to ggplot2 figures using the function ggpubr::stat_compare_mean(). However I cannot get the text "p = " to show up in front of the p-values. There are examples of how to add "p = " in front of p-values on the help page for the function but they do not seem to work.
Example
library(ggplot2)
library(ggpubr)
library(dplyr)
data("Cars93")
# List of the comparisons I would like to make for which p-values will be derived
my_comparisons <- list(c("Front", "Rear"),
c("Front", "4WD"),
c("Rear", "4WD"))
# creates the figure with p-value but no label indicating the values are p-values
Cars93 %>%
mutate(DriveTrain = factor(DriveTrain, levels = c("Front","Rear","4WD"))) %>%
ggplot(aes(x = DriveTrain, y = Price)) +
stat_compare_means(paired = F,
comparisons = my_comparisons) +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
following the example at the bottom of the ?stat_compare_means page suggests using aes(label = paste0("p = ", ..p.format..) which does not work.
?stat_compare_means
Cars93 %>%
mutate(DriveTrain = factor(DriveTrain, levels = c("Front","Rear","4WD"))) %>%
ggplot(aes(x = DriveTrain, y = Price)) +
stat_compare_means(paired = F,
comparisons = my_comparisons,
aes(label = paste0("p = ", ..p.format..))) +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
If you look at the label argument on the ?stat_compare_means help page it says the allowed values include "p.signif" or "p.format" which made me think ..p.format.. was deprecated, so I tried adding in "p.format" which also did not work.
Cars93 %>%
mutate(DriveTrain = factor(DriveTrain, levels = c("Front","Rear","4WD"))) %>%
ggplot(aes(x = DriveTrain, y = Price)) +
stat_compare_means(paired = F,
comparisons = my_comparisons,
aes(label = paste0("p = ", "p.format"))) +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
In the end I would like the p-values to be preceded by p = such that the labels would say p = 0.00031, p = 0.059, and p = 0.027.
When you use a list of comparisons, stat_compare_means defaults to using geom_signif from the ggsignif package, essentially acting as a glorified wrapper function. In so doing, you lose some of the formatting flexibility. Better in this case to use geom_signif directly:
library(ggsignif)
Cars93 %>%
mutate(DriveTrain = factor(DriveTrain, levels = c("Front","Rear","4WD"))) %>%
ggplot(aes(x = DriveTrain, y = Price)) +
geom_signif(y_position = c(55, 60, 65),
comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
geom_boxplot(outlier.colour="white", outlier.fill = "white",
outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2),
color = "black", fill = "white", size = 2) +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
How can I shift the legend by several points (the width of the graph box line) to the left and bottom?
Task: I want to make the background of the legend semi-transparent, but so it doesn't overlap the graph box.
(red border - for better visualization of the problem)
Use the code:
image + theme(
panel.background = element_rect(fill = "white", color = NA),
panel.border = element_rect(fill = NA, color = "black", size = 2),
panel.grid.major = element_line(color = "#00000040", linetype = 3),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
legend.key = element_rect(fill = NA, color = NA),
legend.background = element_rect(fill = "#ffffff80", color = "red", size = 1),
legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.title = element_text(size = 10, color = "black"),
plot.title = element_text(hjust = 0.5),
)
If you want the legend box to align with the outside edge of the plot border, you need to adjust the legend.box.margin so that the top edge has the same value as the width of the line defined in legend.background.
There was no sample data, so I created some:
library(ggplot2)
x <- seq(0, 2 * pi, length.out = 100)
df <- data.frame(x = c(x, x), y = c(-cos(x), sin(x)),
group = rep(c("data1", "data2"), each = 100))
image <- ggplot(df, aes(x, y)) +
geom_line(aes(colour = group)) +
scale_colour_manual(values = c("red", "black"))
image + theme(
panel.background = element_rect(fill = "white", color = NA),
panel.border = element_rect(fill = NA, color = "black", size = 2),
panel.grid.major = element_line(color = "#00000040", linetype = 3),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
legend.key = element_rect(fill = NA, color = NA),
legend.background = element_rect(fill = "#ffffff80", color = "red", size = 1),
legend.justification = c(1, 1),
legend.position = c(1, 1),
legend.title = element_text(size = 10, color = "black"),
legend.box.margin = margin(1, 0, 0, 0),
plot.title = element_text(hjust = 0.5)
)
Created on 2020-05-25 by the reprex package (v0.3.0)
found it!
legend.box.margin = margin(t = 2, r = 2),
Just to be clear: I am relatively new to R, and the code I am using is borrowed from someone else.
I have this graph for polling averages:
Here is my code: https://pastebin.com/qvQERRUH
library("tidyverse")
polls <- read.csv("polls_Paris.csv")
polls <- polls %>%
mutate(
date = format(as.Date(c(paste(year,month, day, sep="-")), by = "days"))
)
for(i in c("LFI", "PS", "EELV", "PP", "Griveaux", "LREM", "Villani", "Agir", "LR", "RN", "LP")) {
polls <- within(polls, {
assign(paste0("ci_", i), 1.96 * sqrt(( get(paste0("liste_", i)) * (100 - get(paste0("liste_", i)))) / n))
}
)
}
polls.10m <- polls[polls$date > seq(as.Date(Sys.Date()), length = 2, by = "-10 months")[2],]
polls.100 <- polls[order(as.Date(polls$date)),] %>% top_n(5000, as.Date(polls$date))
#Results = data.frame(date = as.Date("2019-12-01"), support = c(69.1,30.9))
svg('Opinion polling for the 2020 Paris municipal election.svg', width = 12, height = 6)
polls.100 %>%
gather(party, support, c(liste_LFI,liste_PS,liste_EELV,liste_PP,liste_Griveaux,liste_LREM,liste_Villani,liste_Agir,liste_LR,liste_RN,liste_LP), factor_key=TRUE) %>%
ggplot(aes(x=as.Date(date), y=support, colour=party)) +
geom_point(size=2.5, alpha=0.275) +
geom_smooth(se=FALSE, method="loess", span=1) +
labs(y = NULL,
x = NULL) +
guides(colour = guide_legend(ncol = 1, override.aes = list(linetype = 0, size = 3, alpha = 1))) +
scale_colour_manual(labels = c("Simonnet (LFI)", "Hidalgo (PS-PCF-G·s)", "Belliard (EELV)", "Gantzer (DVG)", "Griveaux (LREM-MR-UDI)", "Griveaux (avant diss. de Villani)", "Villani (Diss. LREM-PRG)", "Bournazel (Agir)", "Dati (LR)", "Federbusch (DVD-RN)", "Campion (SE)"), values = c("#cc2443", "#FF8080", "#00c000", "#ffc0c0", "#ffeb00", "#ffeb00", "#FF7F50", "#adc1fd", "#0066CC", "#0D378A", "#808080", "#808080")) +
theme(
plot.margin = margin(t = 0, unit = "cm"),
plot.background = element_blank(), panel.background = element_rect(fill = "grey92", colour = NA),
panel.border = element_blank(), legend.background = element_rect(fill = "transparent", colour = NA),
legend.key = element_rect(fill = "transparent", colour = NA), legend.title = element_blank(),
strip.background = element_rect(fill = "transparent", colour = NA),
panel.grid.major = element_line(colour = "#FFFFFF"), panel.grid.minor = element_line(colour = "#FFFFFF", size = 0.25),
axis.ticks = element_line(colour = "grey20"), axis.line = element_blank(),
plot.title = element_text(size = 12, hjust = 0),
plot.subtitle = element_text(size = 12, hjust = 0),
plot.caption = element_text(size = 12, colour = "#212121"),
axis.title = element_text(size = 12, face = "plain"), axis.text = element_text(size = 12, face = "plain", colour = "grey30"),
legend.position = "right",
legend.text = element_text(size = 12), strip.text = element_text(size = 12, face = "plain"),
legend.margin = margin(t = 0, unit = "cm"),
) +
scale_y_continuous(breaks = seq(0,33,5), minor_breaks = seq(0,33,1), limits = c(0, 33), expand = c(0, 0)) +
scale_x_date(breaks="6 months", minor_breaks="1 month", expand = c(0, 0))
#geom_point(data = Results, colour = c("#808080", "#E81B23"), size=4, shape=5) +
#geom_point(data = Results, colour = c("#808080", "#E81B23"), size=3.5, shape=18)
dev.off()
As you can see, Griveaux's line is split to separate the before-and-after of Villani's dissident candidacy; it's actually 2 separate lines (also separate in the dataset). Griveaux's name therefore has to appears twice.
How do I do to remove the key of a single set (remove the key for both the dots and regression line)?
Here is a hack. To remove a legend key, remove it from the breaks argument to scale_*_manual or equivalent but you must keep the same number of values as there are unique values in the color/fill aesthetic.
This is better shown with an example. I will use built-in data set iris.
To remove the legend key relative to "versicolor",
levels(df1$Species)
#[1] "setosa" "versicolor" "virginica"
just don't include it in the breaks.
library(ggplot2)
df1 <- iris[3:5]
ggplot(df1, aes(Petal.Length, Petal.Width, color = Species)) +
geom_point() +
geom_smooth(se = FALSE, method = "loess", span = 1) +
scale_color_manual(breaks = c("setosa", "virginica"),
values = c("red", "green", "blue"))
I am trying to move the legend text and legend boxes further apart (horizontally) on a box and jitter plot. The complicating factor is the coord_flip I used to make the boxplot horizontal. In theme I tried using both legend.spacing.x and legend.spacing.y but neither had any effect on the distance between legend text and legend boxes.
Here is the graph with fake data. More complex than necessary I know but I need to be able to make it work with all the complications.
library(dplyr)
library(ggplot2)
set.seed(01234)
# make some data
totDays <- data.frame(id = 1:80,
group = rep(c("Placebo", "Drug"), each = 40),
total84 = c(pmin(abs(round(rnorm(40, 55, 30))),84), pmin(abs(round(rnorm(40, 38, 30))),84)))
# get some descriptives
(groupDF <- totDays %>% group_by(group) %>%
dplyr::summarise(m = mean(total84, na.rm = T),
sd = sd(total84, na.rm = T),
count = n()) %>%
mutate(se = sd/sqrt(count)))
# now for the box and scatter plot
(g <- ggplot(totDays, aes(group, total84, colour = group)) +
geom_jitter(size = 1, width = 0.1) + # so points aren't overlaid, width controls how much jitter
geom_point(stat = "summary", fun.y = "mean", shape = 3, size = 3, colour = "black") + # crosses for mean
geom_boxplot(alpha = 0, width = 0.5, lwd = 1, size = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800")) +
scale_y_continuous(breaks = seq(0,84,14), minor_breaks = seq(0, 84, 14)) + # changes minor break line
coord_flip() +
labs(y = "Score") +
geom_hline(yintercept = c(groupDF$m), linetype = "dotted") +
geom_segment(x = 2.38, xend = 2.38, y = groupDF$m[2] + .1, yend = groupDF$m[1] - .1, size = .7, arrow = arrow(end = "both", type = "open", length = unit(0.15, "cm")), colour = "#696969") +
annotate("text", x = 2.46, y = mean(groupDF$m), label = paste0("italic(p) == ", 0.02), parse = T) +
theme_bw() +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(size = 13),
axis.title.x = element_text(size = 13, face = "bold", margin = margin(t = 0, r = 0, b = 10, l = 0), vjust = -2), # note the use of margin to move the title away from the axis text
legend.title = element_blank(),
legend.position = "top",
legend.spacing.y = unit(.1, "cm"),
legend.box.spacing = unit(.1, "cm"), # adjusts distance of box from x-axis
legend.key.size = unit(1, "cm"),
legend.text = element_text(size = 13, face = "bold"),
strip.text = element_text(size = 13, face = "bold"),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(size=.4, color="#F7F7F7")))
Use either stringr::str_pad() or theme(legend.spacing.x = ...) or both
g <- ggplot(totDays, aes(group, total84, colour = group)) +
geom_jitter(size = 1, width = 0.1) + # so points aren't overlaid, width controls how much jitter
geom_point(stat = "summary", fun.y = "mean", shape = 3, size = 3, colour = "black") + # crosses for mean
geom_boxplot(alpha = 0, width = 0.5, lwd = 1, size = 0.5) +
scale_color_manual(values = c("#00AFBB", "#E7B800"),
### added
labels = stringr::str_pad(c("Drug", "Placebo"), 10, "right")) +
scale_y_continuous(breaks = seq(0,84,14), minor_breaks = seq(0, 84, 14)) + # changes minor break line
coord_flip() +
labs(y = "Score") +
geom_hline(yintercept = c(groupDF$m), linetype = "dotted") +
geom_segment(x = 2.38, xend = 2.38, y = groupDF$m[2] + .1, yend = groupDF$m[1] - .1, size = .7,
arrow = arrow(end = "both", type = "open", length = unit(0.15, "cm")), colour = "#696969") +
annotate("text", x = 2.46, y = mean(groupDF$m), label = paste0("italic(p) == ", 0.02), parse = T) +
theme_bw() +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(size = 13),
axis.title.x = element_text(size = 13, face = "bold",
margin = margin(t = 0, r = 0, b = 10, l = 0), vjust = -2),
legend.title = element_blank(),
legend.position = "top",
### added
legend.spacing.x = unit(0.25, 'cm'),
legend.spacing.y = unit(.1, "cm"),
legend.box.spacing = unit(.1, "cm"), # adjusts distance of box from x-axis
legend.key.size = unit(1, "cm"),
legend.text = element_text(size = 13, face = "bold"),
strip.text = element_text(size = 13, face = "bold"),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(size=.4, color="#F7F7F7"))
Created on 2019-03-11 by the reprex package (v0.2.1.9000)