Related
I have my labels roughly aligned to each side of my stacked bar chart. The problem is that they look like a mess because they aren't right and left justified on either side of the bar. How do I fix this so that they look professional?
df3 <- data.frame(
Label = c("Dasher", "Dancer", "Comet", "Cupid", "Prancer", "Blitzen", "Rudolph"),
Amount = c(650.01, 601.01, 340.05, 330.20, 260.01, 250.80, 10.10)
)
# Sort order
level_order <- df3 %>%
arrange(desc(Amount))
ggplot(level_order, aes(fill=fct_inorder(Label), y=Amount, x="")) +
geom_bar(position="stack", stat="identity", width = 0.55) +
scale_fill_brewer(palette = "Blues", direction = -1) +
theme_void() +
geom_text(aes(label = paste0("$", Amount)),
position = position_stack(vjust = 0.5),
hjust = -3.1,
size = 5) +
geom_text(aes(label = Label),
position = position_stack(vjust = 0.5),
hjust = 5,
size = 5) +
theme(legend.position = "none") +
theme(plot.title = element_text(size = 50, hjust = .5, vjust = 0)) +
ggtitle("Food Costs by Reindeer")
hjust determines the text alignment (with 0 being left-aligned, and 1 right-aligned). The x co-ordinate of your geom_text at the moment is defaulted to 1, so changing this will change the position of the text.
ggplot(level_order, aes(fill=fct_inorder(Label), y=Amount, x="")) +
geom_bar(position="stack", stat="identity", width = 0.55) +
scale_fill_brewer(palette = "Blues", direction = -1) +
theme_void() +
geom_text(aes(x=0.6, label = paste0("$", Amount)),
position = position_stack(vjust = 0.5),
hjust = 0.5,
size = 5) +
geom_text(aes(x=1.4, label = Label),
position = position_stack(vjust = 0.5),
hjust = 0.5,
size = 5) +
theme(legend.position = "none") +
theme(plot.title = element_text(size = 50, hjust = .5, vjust = 0)) +
ggtitle("Food Costs by Reindeer")
You can also pass hjust as an aesthetic. In order to do that, you will need to prepare the labelling as a separate data frame. Then, you only need to call geom_text once. I don't say this is necessarily better, but just pointing out that this is possible. A few more comments in the code, also regarding a few common pitfalls.
library(tidyverse)
df3 <- data.frame(
Label = c("Dasher", "Dancer", "Comet", "Cupid", "Prancer", "Blitzen", "Rudolph"),
Amount = c(650.01, 601.01, 340.05, 330.20, 260.01, 250.80, 10.10)
) %>%
## arrange step here
arrange(desc(Amount))
## I like to prepare the data outside ggplot
label_df <- df3 %>%
mutate(Amount_lab = paste0("$", Amount)) %>%
pivot_longer(-Amount) %>%
## this adds a column for your adjustment, and the x position compared with the central column
mutate(hjust = rep(0:1, nrow(.)/2),
x = rep(c(1.21, .79), nrow(.)/2))
ggplot(mapping = aes(y = Amount)) +
## geom_col is geom_bar(stat = "identity"), stack is default, so you can omit it
## call data in the geom layers
## set x to 1
## width = .4 so it matches your selected x from above
geom_col(data = df3, aes(x = 1, fill=fct_inorder(Label)), width = .4) +
scale_fill_brewer(palette = "Blues", direction = -1) +
## need to reverse both y and value, weirdly
geom_text(data = label_df, aes(x, y = rev(Amount), label = rev(value),
## this is the main trick
hjust = hjust),
position = position_stack(vjust = 0.5) ) +
## sadly, need to turn clip off
coord_cartesian(clip = "off") +
theme_void() +
## call theme only once!!
theme(legend.position = "none",
plot.title = element_text(size = 20, hjust = .5, vjust = 0),
## you need to add a margin
plot.margin = margin(r = .6, l = .6, unit = "in")) +
ggtitle("Food Costs by Reindeer")
Created on 2021-12-20 by the reprex package (v2.0.1)
Try fixing the x co-ordinate in the call to geom_text and managing alignment with hjust...
df3 <- data.frame(
Label = c("Dasher", "Dancer", "Comet", "Cupid", "Prancer", "Blitzen", "Rudolph"),
Amount = c(650.01, 601.01, 340.05, 330.20, 260.01, 250.80, 10.10)
)
library(ggplot2)
library(dplyr)
library(forcats)
level_order <- df3 %>%
arrange(desc(Amount))
ggplot(level_order, aes(fill=fct_inorder(Label), y=Amount, x="")) +
geom_bar(position="stack", stat="identity", width = 0.55) +
scale_fill_brewer(palette = "Blues", direction = -1) +
theme_void() +
geom_text(aes(x = 1.3, label = paste0("$", Amount)),
position = position_stack(vjust = 0.5),
hjust = 0,
size = 5) +
geom_text(aes(x = 0.6, label = Label),
position = position_stack(vjust = 0.5),
hjust = 0,
size = 5) +
theme(legend.position = "none") +
theme(plot.title = element_text(size = 50, hjust = .5, vjust = 0)) +
ggtitle("Food Costs by Reindeer")
Created on 2021-12-19 by the reprex package (v2.0.1)
Using Peter's answer above (reminding me of the "x" position argument I forgot existed), this was the final fix that got what I was looking for. hjust = 0 is left-justification and hjust = 1 is right justification.
library(tidyverse)
library(grid)
df3 <- data.frame(
Label = c("Dasher", "Dancer", "Comet", "Cupid", "Prancer", "Blitzen", "Rudolph"),
Amount = c(650.01, 601.01, 340.05, 330.20, 260.01, 250.80, 10.10)
)
# Sort order
level_order <- df3 %>%
arrange(desc(Amount))
ggplot(level_order, aes(fill=fct_inorder(Label), y=Amount, x="")) +
geom_bar(position="stack", stat="identity", width = 0.55) +
scale_fill_brewer(palette = "Blues", direction = -1) +
theme_void() +
geom_text(aes(x = 1.3, label = paste0("$", Amount)),
position = position_stack(vjust = 0.5),
hjust = 0,
size = 5) +
geom_text(aes(x = 0.7, label = Label),
position = position_stack(vjust = 0.5),
hjust = 1,
size = 5) +
theme(legend.position = "none",
plot.margin = unit(c(0,0,2,0), "cm"))
grid.text("Food Costs by Reindeer", x = unit(0.5, "npc"), y = unit(0, "npc"),
vjust = -0.5, gp = gpar(cex=4))
I want to generate facetted boxplots and overlay them with geom_point for multiple lab parameters. The plots should be split along age_group, in the age groups the status of patients should be compared next to each other.
Here's what my mapping looks like:
aes(x=Age_group2, y=value, fill=Status)
This is the overall code for generating my report:
pdf("lab10.pdf")
for (i in 1:number_of_facets_lab){
print(ggplot(lab_merge_clean, aes(x=Age_group2, y=value, fill=Status)) +
geom_boxplot(outlier.shape = NA, position = position_dodge2(width = 0.7,preserve="total"),
width = 0.6) +
geom_point(data = lab_merge_clean %>% filter(max_WHO_classification >= 3 &
analysis.identifier != "COV006"),
position = position_dodge(width = 0.6, preserve="total"),
shape = 16,alpha = 0.7, size = 2.5)+
geom_point(data = lab_merge_clean %>% filter(max_WHO_classification < 3),
position = position_dodge(width = 0.6, preserve="total"),
shape = 1, alpha = 0.7, size = 2.5) +
geom_point(data = lab_merge_clean %>% filter(analysis.identifier=="COV006" ),
position = position_dodge(width = 0.6, preserve = "total"),
shape = 1, alpha = 0.7, size = 2.5, colour = "red", fill = "red") +
scale_fill_manual(values = c("lightskyblue2", "orange2"))+
facet_wrap_paginate(~ lab_parameter, scales = "free_y", ncol = 1, nrow = 1, page = i) +
theme_bw() +
stat_compare_means(aes(label = paste0("p =", ..p.format..))) +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.2))) +theme_classic() +
theme( axis.text = element_text( size = 14 ),
axis.text.x = element_text( size = 20 ),
axis.title = element_text( size = 16, face = "bold" ),
legend.position="none",
strip.text = element_text(size = 20))
)
print(i)
}
dev.off()
In the best case the result looks like this:
But sometimes this happens:
The boxplot is shifted, because there are no observations in the other group. This is why I want to manually fix the number of groups in the fill variable, so that the plots will not get shifted and align with the geom_point dots.
I have looked up solutions on stack, but I haven't managed to find an answer yet. Any help will be greatly appreciated!
Are you looking for position_dodge2 with preserve = "single" but a preserve = "total" on the geom_point?
library(ggplot2)
set.seed(1)
df <- data.frame(y = rnorm(30),
x = c(rep("A", 20), rep("B", 10)),
z = rep(c("1", "2", "1"), each = 10))
ggplot(df, aes(x, y, fill = z)) +
geom_boxplot(position = position_dodge2(width = 1, preserve = "single"),
width = 0.8) +
geom_point(position = position_dodge(width = 0.8, preserve = "total")) +
scale_fill_manual(values = c("lightskyblue2", "orange2")) +
theme_classic() +
theme(legend.position = "none")
What is the function for generating data for plotting an exponential curve between two points? Here's a logarithmically spaced sequence. I want to create more of a hockey stick between the start and end point, and the real end goal is the vector of values not the plot.
My use case is that I have a parameter for a plotting function that needs to ramp up slowly between the given values as I try to plot more data. This log sequence is better than a linear sequence, but it still rises too rapidly. I need to keep the values lower and then increase exponentially.
library(emdbook)
plot(lseq(.08, .25, 10000))
Update
Here is the full challenge for context. I'm plotting every 400th index value of s. The geom_dotplot in the final plot, p_diff, is wacky and needs certain binwidth values to correctly size the plot. I tried creating a log sequence called binsize and passing it to the parameter. It looks fine at low values of s, but increases to 0.25 too quickly (0.25 works for the final version with 10000 dots).
library(tidyverse)
library(ggtext)
library(patchwork)
library(truncnorm)
library(ggtext)
library(emdbook)
# simulate hypothetical population at control group mean/sd
set.seed(1)
pop <- data.frame(bdi3 = rtruncnorm(10000, a=0, b=63, mean=24.5, sd=10.7),
id = seq(1:10000))
# create plots
diff <- data.frame(NULL)
binsize = lseq(0.08695510, .25, 10000)
for (s in 1:10000) {
set.seed(s)
samp <-
pop %>%
sample_n(332, replace = FALSE)
ctr <-
samp %>%
sample_n(166, replace = FALSE) %>%
mutate(trt = 0)
trt <-
samp %>%
left_join(dplyr::select(ctr, id, trt), by="id") %>%
mutate(trt = ifelse(is.na(trt), 1, trt)) %>%
filter(trt==1)
diff[s,1] <- s
diff[s,2] <- (mean(trt$bdi3)-mean(ctr$bdi3))
names(diff) <- c("id", "diff")
dat <-
ctr %>%
bind_rows(trt)
if (s %in% seq(1, 10000, by=400)) {
# population
p_pop <-
pop %>%
left_join(dplyr::select(dat, id, trt), by="id") %>%
# mutate(trt = ifelse(is.na(trt), 3, trt),
# trt = factor(trt)) %>%
mutate(selected = ifelse(!is.na(trt), 1, 0),
selected = factor(selected)) %>%
ggplot(., aes(x=bdi3, fill=selected, group=id, alpha=selected)) +
geom_dotplot(method = 'dotdensity', binwidth = 0.25, dotsize = 1,
color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_fill_manual(values=c("grey", "#e69138")) +
scale_alpha_discrete(range = c(0.5, 1)) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_text(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(pop$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(pop$bdi3)+1, y = 25,
label = paste0("Population mean = ",
format(round(mean(pop$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = 20,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("Imaginary population of 10,000 patients who meet study criteria",
subtitle="<span style='color:#e69138'>**Orange**</span> dots represent 332 selected patients")
p_samp <-
ggplot(dat, aes(x=bdi3)) + # group=id, fill=factor(trt)
geom_dotplot(method = 'dotdensity', binwidth = 1.2,
fill="#e69138", alpha=.8, color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up", stroke=1) +
#scale_fill_manual(values=c("#f7f265", "#1f9ac9")) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(dat$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(dat$bdi3)+2, y = 1,
label = paste0("Sample mean = ",
format(round(mean(dat$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("One possible sample of these patients (N=332)",
subtitle="Each dot is a patient sampled from the population who gets randomly assigned to a study arm") +
annotate("text", x = 50, y = .3,
label = "randomize to study arms",
size = 10, color="#696865") +
geom_curve(aes(x = 35, y = .6, xend = 50, yend = .35),
color = "#696865", arrow = arrow(type = "open",
length = unit(0.15, "inches")),
curvature = -.5, angle = 100, ncp =15)
p_ctr <-
ggplot(ctr, aes(x=bdi3)) +
geom_dotplot(method = 'dotdensity', binwidth = 1.6,
color="white", fill="#f7f265", alpha=1,
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(pop$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(ctr$bdi3)+2, y = 1,
label = paste0("Control mean = ",
format(round(mean(ctr$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("50% patients randomly assigned<br>to the <span style='color:#f7f265'>**control**</span> group",
subtitle="166 of the <span style='color:#e69138'>**orange**</span> dots turn <span style='color:#f7f265'>**yellow**</span>")
p_trt <-
ggplot(trt, aes(x=bdi3)) +
geom_dotplot(method = 'dotdensity', binwidth = 1.6,
color="white", fill="#1f9ac9", alpha=1,
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(trt$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(trt$bdi3)+2, y = 1,
label = paste0("Treatment mean = ",
format(round(trt$bdi3, 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("50% patients randomly assigned<br>to the <span style='color:#1f9ac9'>**treatment**</span> group",
subtitle="166 of the <span style='color:#e69138'>**orange**</span> dots turn <span style='color:#1f9ac9'>**blue**</span>")
p_diff <-
diff %>%
mutate(color=ifelse(diff < -2.3 | diff > 2.3, 1, 0)) %>%
mutate(color=factor(color)) %>%
ggplot(., aes(x=diff, fill=color, group=id)) +
geom_dotplot(method = 'dotdensity', binwidth = binsize[s], dotsize = 1,
color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_fill_manual(values=c("grey", "red")) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(breaks=c(-5:5), limits=c(-5, 5)) +
xlab("\nAverage Treatment Effect (Treatment Mean - Control Mean)") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_text(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27)) +
geom_vline(xintercept = 0, linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = 0.2, y = 25, label = "No effect",
hjust = 0, color = "#696865", size=10) +
ggtitle("Simulation based null distribution",
subtitle="Plausible estimates of the treatment effect if the hypothesis of no effect is true") +
geom_vline(xintercept = 2.3, linetype="dotted",
color = "red", size=1) +
geom_vline(xintercept = -2.3, linetype="dotted",
color = "red", size=1) +
annotate("text", x = 2.5, y = 25, label = "Reject null",
hjust = 0, color = "red", size=10) +
annotate("text", x = -2.5, y = 25, label = "Reject null",
hjust = 1, color = "red", size=10) +
annotate("text", x = -5, y = 20,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10)
p_all <- p_pop / p_samp / (p_trt + p_ctr) / p_diff +
plot_layout(heights = c(2, 2, 1, 2))
ggsave(paste0("animate/", s, ".png"),
height = 40, width = 18.5, units = "in",
dpi = 300)
}
}
The second plot to generate, s==401, looks fine. binsize[401] works for this many dots. But by the 5th plot, s==1601, the dots to not fit. binsize[1601] is too high.
I'm thinking that if I could create a better vector of values for binsize that rises more slowly to 0.25 this will work.
This is more of a maths question rather than a programming question, but there's a fairly simple programming solution.
Here's a simple function you can try. It allows you to produce a sequence of numbers between a starting and ending number just like the lseq function, but includes a shape parameter that controls how "exponential" the numbers appear.
seq_exp <- function(start, stop, n, shape)
{
(stop - start) * exp(seq(0, shape, length.out = n))/exp(shape) + start
}
So you're probably looking for something like this:
plot(seq_exp(0.08, 0.25, 10000, shape = 10))
If you set the shape parameter to 1 it is just a normal exponential curve like in lseq:
plot(seq_exp(0.08, 0.25, 10000, shape = 1))
And of course you can play around with different values:
plot(seq_exp(0.08, 0.25, 10000, shape = 5))
plot(seq_exp(0.08, 0.25, 10000, shape = 30))
Created on 2020-04-01 by the reprex package (v0.3.0)
I am using ggplot2 to plot bar chart. I want to add labels over bars in bar chart but the label name of the bar with the highest value gets hidden. I tried to set margin but the label value still not get visible.
library(ggplot2)
x <- c(1:27)
y <- c(988,1195,804,574,414,309,234,196,169,125,95,73,57, 63 ,31,32 ,28 ,29 ,37 ,37 ,21 ,20 ,5,4,2,1,4)
z <- c(11233,7856,5926,4615,3714, 3037, 2548, 2156, 1842, 1610, 1436, 1302, 1177,1066 ,1000 ,936,882,828,760,697,659,621,611,603,599,597,591)
dat <- data.frame(x,y,z)
g <- ggplot(dd, aes(x = dat$x, y = dat$y)) +
geom_bar(stat = "identity", colour = "black", fill = "pink", width = .5,
position = position_dodge()) +
geom_text(aes(label = dat$z, hjust = 0), position = position_dodge(width = 0.9),
angle = 90)
g
g + theme_bw() +
theme(panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.margin=unit(c(0.5, 1, 1, 2), "lines")) +
scale_y_continuous(expand = c(0, 0))
And the barplot I get is
Or adjust the limits:
+ scale_y_continuous(expand = c(0,0), limits = c(0, max(y) * 1.15))
Try using a different value for the expand parameter within scale_y_continuous:
g <- ggplot(dat, aes(x = x, y = y)) +
geom_bar(stat = "identity", colour = "black", fill="pink", width = .5,
position = position_dodge()) +
geom_text(aes(label = z, hjust=0),
position = position_dodge(width = 0.9), angle = 90) +
scale_y_continuous(expand = c(0.15, 0))
g
I'm interested in ways to only include panel grid lines right near the ribbon--I can do this manually, in a trivial example
library(ggplot2)
d1 <- data.frame(x = seq(0, 1, length.out = 200))
d1$y1 <- -3*(d1$x-.5)^2 + 1
d1$y2 <- -3*(d1$x-.5)^2 + 2
ggplot(d1) +
geom_ribbon(aes(x, ymin = y1, ymax = y2),
alpha = .25) +
geom_ribbon(aes(x, ymax = y1),
ymin = .25,
fill = "white") +
geom_ribbon(aes(x, ymin = y2),
ymax = 2,
fill = "white") +
scale_y_continuous(limits = c(.25, 2.0),
expand = c(0, 0))+
scale_x_continuous(limits = c(0, 1),
expand = c(0, 0))+
theme_bw() +
theme(panel.grid = element_line(linetype = 1, color = "black"))
is there some less hacky way to have a transparent mask for these gridlines, so they only appear underneath a ribbon?
If gridlines the same color as the background are acceptable, you can remove the actual gridlines, then use geom_hline() and geom_vline() to make your own "gridlines" that will show on ribbons but be invisible against the background
d1$y3 <- d1$x + 0.3
d1$y4 <- d1$x + 0.4
ggplot(d1) +
geom_ribbon(aes(x, ymin = y1, ymax = y2), alpha = 0.25) +
geom_ribbon(aes(x, ymin = y3, ymax = y4), alpha = 0.25, fill = "blue") +
# use geom_vline and geom_hline to plot "gridlines" on top of ribbons
geom_hline(yintercept = seq(0, 2, by = 0.25), colour = "white") +
geom_vline(xintercept = seq(0, 1, by = 0.25), colour = "white") +
scale_y_continuous(limits = c(.25, 2.0), expand = c(0, 0)) +
scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
theme_bw() +
theme(panel.grid.minor = element_blank(), # remove actual gridlines
panel.grid.major = element_blank())
produces this:
This is still a workaround, and will only make gridlines that match the background color, but it is easy to use with a variety of plots, such as the situation you mentioned with multiple ribbons (I've added a second ribbon to demonstrate that this will work)