Related
I have a data frame with three groups (group1, group2, group3). I would like to show the p-value of their mean comparisons in ggplot2 which I can do however, the values are stacked ontop of one another making it difficult to see what is being compared. When I try to adjust where the p-values are located using the y_position() function, the boxplots collapse (I think because the y-axis is log10) but the p-values are no longer stacked ontop of one another. How can I keep the boxplots from collapsing and keep the p-values displayed so that you can see what is being compared?
Example data
library(ggplot2)
library(dplyr)
library(ggsignif)
df <- data.frame(matrix(ncol = 2, nrow = 30))
colnames(df)[1:2] <- c("group", "value")
df$group <- rep(c("group1","group2","group3"), each = 10)
df[1:10,2] <- rexp(10, 1/10)
df[11:20,2] <- rexp(10, 1/100)
df[21:30,2] <- rexp(10, 1/900)
# Need to say what should be compared for p-value determination
my_comparisons <- list(c("group1", "group2"),
c("group1", "group3"),
c("group2", "group3"))
Boxplots showing the distribution of value for each group however the p-values are ontop of one another so you cannot compare among groups.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
Adjusting the y_position() of where the p-values should display but this collapses the y-axis. I have tried several values within y_position.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(y_position = c(2000,1800,1600),
comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
For some reason this parameter ignores the axis transformation. You therefore need to use the log10 values of the desired positions:
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
y_position = log10(c(5000, 10000, 25000)),
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white",
-outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black",
fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
I am trying to change the order of the slices in a pie chart in R. I want the largest value (38, which belongs to Agricultural intensification) on the right of 0 degrees, the second largest slice (20, belongs to Deforestation) on the left, the third largest value (17, Urbanization) as second one on the right, and the lowest value (10, Wetland or river modification) as second one on the left.
I used the code written out below. Thank you so much for helping me out!
Kind regards,
Stefanie
Pie chart
df <- data.frame(value = c(38, 20, 17, 10),
group = c('Agricultural intensification', 'Deforestation', 'Urbanization', 'Wetland or river modification'))
library(ggplot2)
ggplot(df, aes(x = "", y = value, fill = group)) +
labs(x='Taxon order', y='Driver', title='Driver per taxon order') + theme(plot.title = element_text(hjust = 0.5, size=20, face='bold')) +
geom_col(color = "black") +
coord_polar("y", start=0) +
geom_text(aes(label = paste0(slices, "%")), position = position_stack(vjust=0.5)) +
labs(x = NULL, y = NULL, fill = NULL) + theme(axis.line = element_line(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) +
theme_void() +
scale_fill_viridis_d()
Convert your group column to a factor and set the levels in your desired order:
df <- data.frame(
value = c(38, 20, 17, 10),
group = c("Agricultural intensification", "Deforestation", "Urbanization", "Wetland or river modification")
)
library(ggplot2)
df$group <- factor(df$group, levels = rev(c("Agricultural intensification", "Urbanization",
"Wetland or river modification", "Deforestation")))
ggplot(df, aes(x = "", y = value, fill = group)) +
labs(x = "Taxon order", y = "Driver", title = "Driver per taxon order") +
theme(plot.title = element_text(hjust = 0.5, size = 20, face = "bold")) +
geom_col(color = "black") +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(value, "%")), position = position_stack(vjust = 0.5)) +
labs(x = NULL, y = NULL, fill = NULL) +
theme(axis.line = element_line(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank()) +
theme_void() +
scale_fill_viridis_d()
I have a 100% stacked bar chart that displays 3 types of variable. I've set a example db so I could create a graph more easily.
I've already adjust the chart with the colors and information I need. But I'm not being able to independently position the labels. Here's the current code and output.
Code:
(empilhado<-ggplot(dfm, aes(y = Year, x = abs(value), fill = variable)) +
scale_x_continuous(sec.axis = sec_axis(trans = ~.*1, name="Trab."), expand=expansion(mult=c(0,0.05)))+
geom_col(data = rotulo, aes(y = Year, x=abs(trabalho), fill=NULL), width = .7, colour="black", lwd=0.1, position = "fill", orientation = "y") +
geom_label(data = rotulo, aes(y= Year, x = abs(trabalho), fill=NULL, label=paste(format(round(trabalho, digits=0), nsmall=0, decimal.mark=",", big.mark="."),
format(round(aprovado, digits=0), nsmall=0, decimal.mark=",", big.mark="."))
), color="black", size=4, position = position_fill(vjust=1.06)) +
geom_col(width = .7, colour="black", lwd=0.1, position = "fill", orientation = "y") +
geom_text(aes(label=format(round(value, digits=0), nsmall=0, decimal.mark=",", big.mark=".")),
size=4, color="white", position = position_fill(vjust=0.5)) +
theme(panel.grid.major = element_line(colour = "gray90",size=0.75), panel.grid.minor = element_line(colour = "gray90",size=0.75),
legend.position="top", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), panel.background = element_blank())+
scale_fill_manual(values = c("#000000","tomato","blue"))
Output:
How is it now? Position_fill(vjust=0.5), so all the labels are centered inside its respective bar.
What I want? To be able to set the position of the 'Marionete' label on the left(like a vjust=0 would do), keep the 'Pedido' label as is (in the center of the 'Pedido' stacked bar) and place the 'Fatura' label on the right (like a vjust=1 would do).
Thanks in advance!
One option to achieve your desired result would be to compute/set the positions for each label and the horizontal alignment manually instead of making use of position="fill":
Making use of some random mock data:
library(ggplot2)
library(dplyr)
dfm <- dfm %>%
group_by(Year) %>%
arrange(desc(variable)) %>%
mutate(
pct = value / sum(value),
x_label = case_when(
variable == "Marionete" ~ 0,
variable == "Pedido" ~ .5 * (cumsum(pct) + lag(cumsum(pct))),
TRUE ~ 1
),
hjust = case_when(
variable == "Marionete" ~ 0,
variable == "Pedido" ~ .5,
TRUE ~ 1
)
)
ggplot(dfm, aes(y = Year, x = abs(value), fill = variable)) +
scale_x_continuous(sec.axis = sec_axis(trans = ~ . * 1, name = "Trab."), expand = expansion(mult = c(0, 0.05))) +
geom_col(width = .7, colour = "black", lwd = 0.1, position = "fill", orientation = "y") +
geom_text(aes(x = x_label, label = format(round(value, digits = 0), nsmall = 0, decimal.mark = ",", big.mark = "."), hjust = hjust),
size = 4, color = "white"
) +
theme(
panel.grid.major = element_line(colour = "gray90", size = 0.75), panel.grid.minor = element_line(colour = "gray90", size = 0.75),
legend.position = "top", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), panel.background = element_blank()
) +
scale_fill_manual(values = c("#000000", "tomato", "blue"))
DATA
set.seed(123)
dfm <- data.frame(
Year = rep(c(2006:2016), each = 3),
value = sample(1:100, 3 * 11, replace = TRUE),
variable = c("Fatura", "Pedido", "Marionete")
)
dfm$variable <- factor(dfm$variable, levels = c("Fatura", "Pedido", "Marionete"))
dfm$Year <- factor(dfm$Year)
I am trying to plot a graph that has two different color scales. The first one is for the y-axis labels and the second one is for the geom_points. Here is a sample from my data and the code I am working on.
library(tidyverse)
library(readxl)
library(scales)
library(ggplot2)
variable <- c("Var1","Var1","Var1","Var1","Var1","Var1","Var1","Var1","Var1","Var1","Var18","Var18","Var18","Var18","Var18","Var18","Var18","Var18","Var18","Var30","Var30","Var30","Var30","Var30","Var4","Var4","Var4","Var4","Var4","Var4","Var4","Var4","Var4","Var4","Var63","Var63","Var63","Var63","Var63")
Type <- c("Type1","Type1","Type1","Type1","Type1","Type1","Type1","Type1","Type1","Type1","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type3","Type3","Type3","Type3","Type3","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2","Type2")
value <- c(8422,6795,5845,7348,5184,1035,481,401,873,1616,7.39099,9.913625,25.8478505,6.2130315,1.3530115,0.2089435,0.041442,5.855607,0.5863545,0.058472901,0.063211708,0.282500045,0.340304871,0.101131024,104,283,136,148,326,65,63,77,71,92,0.001131,9.20E-05,0.000267,0.000284,3.50E-05)
Class <- c("Class1","Class1","Class1","Class1","Class1","Class2","Class2","Class2","Class2","Class2","Class1","Class1","Class1","Class1","Class2","Class2","Class2","Class2","Class2","Class2","Class2","Class2","Class2","Class2","Class1","Class1","Class1","Class1","Class1","Class2","Class2","Class2","Class2","Class2","Class3","Class3","Class3","Class3","Class3")
dat <- data.frame(variable, Type, value, Class)
dat<- dat %>% group_by(variable) %>% mutate(upper = max(value))
dat.col <- dat %>% group_by(variable, Type) %>% summarise(upper = max(value)) %>% arrange(desc(upper))
type_filter <- ifelse(dat.col$Type == "Type1", "blue", ifelse(dat.col$Type == "Type2", "purple", "black"))
names(type_filter) <- as.character(dat.col$variable)
p1 <- ggplot(dat, aes(value, reorder(variable, upper)))+
geom_point(aes(color=Class), size = 1, show.legend = TRUE)+
geom_text(aes(label = Type, color = Type), alpha = 0)+
scale_color_manual(values = c("Class3" = "grey40", "Class2" = "red", "Class1" = "chartreuse4",
"Type1" ="blue", "Type2"="purple", "Type3" = "black"),
guide = guide_legend((override.aes = list(alpha = 1))))+
scale_x_log10(breaks = c(1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5),
labels = trans_format("log10", math_format(10^.x)))+
labs(x="xlab", y = "ylab", face = "bold")+
guides(colour = guide_legend(override.aes = list(size=3)))+
theme_bw(base_family = "serif", base_size = 12)+
theme(legend.position = c(0.20, 0.90),
legend.title = element_blank(),
legend.background = element_blank(),
legend.box.background = element_rect(colour = "black"),
panel.grid.minor=element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(linetype = "dotted", color = "black", size = 0.4),
axis.text.y = element_text(face = "bold", color = type_filter),
axis.text.x = element_text(face = "bold"))
print(p1)
The output of this code fails to assign correct color scales to y-axis labels. As you can see from the image below, Var1, Var4, and Var63 have wrong colors. Var18 and Var30 are correct however I think it is a coincidence. This is the best I can do. Even if I remove the geom_plots and its color scales, the y-axis still assigns the wrong colors.
How can I fix this?
Your colors are reversed - ggplot assumes that Var63is the lowest y-value and Var1 the highest. Reversing the order of type_filter should do the trick - e.g.:
axis.text.y = element_text(face = "bold", color = rev(type_filter)),
Just need to reverse your order in the axis.text.y call:
p1 <- ggplot(dat, aes(value, reorder(variable, upper)))+
geom_point(aes(color=Class), size = 1, show.legend = TRUE)+
geom_text(aes(label = Type, color = Type), alpha = 0)+
scale_color_manual(values = c("Class3" = "grey40", "Class2" = "red", "Class1" = "chartreuse4",
"Type1" ="blue", "Type2"="purple", "Type3" = "black"),
guide = guide_legend((override.aes = list(alpha = 1))))+
scale_x_log10(breaks = c(1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5),
labels = trans_format("log10", math_format(10^.x)))+
labs(x="xlab", y = "ylab", face = "bold")+
guides(colour = guide_legend(override.aes = list(size=3)))+
theme_bw(base_family = "serif", base_size = 12)+
theme(legend.position = c(0.20, 0.90),
legend.title = element_blank(),
legend.background = element_blank(),
legend.box.background = element_rect(colour = "black"),
panel.grid.minor=element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(linetype = "dotted", color = "black", size = 0.4),
axis.text.y = element_text(face = "bold", color = rev(type_filter)), ## REVERSE ORDER ##
axis.text.x = element_text(face = "bold"))
print(p1)
I created a vector of ordered names and tried to replace each panel title with the ordered one (e.g., Jessie with 1. Jessie, Marion with 2.Marion, etc.). But, I am getting NAs for each panel title instead. Any hints what is going wrong.
With the NAs
With the labeller commented out
list.top.35.names.ordered <- data.frame( cbind(order = c(1:35),list.top.35.names)) %>%
unite( name.new, c("order" ,"list.top.35.names"), sep = ".")
list.top.35.names.ordered <- list.top.35.names.ordered$name.new[1:35]
str(list.top.35.names.ordered)
chr [1:35] "1.Jessie" "2.Marion" "3.Jackie" "4.Alva" "5.Trinidad" "6.Ollie" ...
data.babyname.all %>%
ggplot( mapping = aes(x = year, y = perc, fill = sex)) +
geom_density(stat = "identity", position = "stack" , show.legend = F ) +
facet_wrap(~name, ncol= 7, nrow =5, labeller= as_labeller(list.top.35.names.ordered)) +
scale_fill_manual(values = c('#E1AEA1','#9ABACF')) +
geom_point(data = most.unisex.year.and.value, mapping = aes(x = year, y = perc),
size = 3,
fill = "white",
color = "black",
shape = 21) +
scale_y_continuous(breaks = c(0,.50,1), labels= c("0%", "50%","%100")) +
scale_x_continuous(breaks = c(1940, 1960, 1980,2000), labels= c('1940', "'60","'80",'2000')) +
geom_text(mapping = aes(x =x , y = y , label = label), check_overlap = F, na.rm = T, position = position_dodge(width=.9), size=3) +
theme_minimal() + #set theme
theme(
text = element_text(size = 10),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
plot.background = element_blank(),
axis.ticks.x = element_line(color = "black"),
axis.ticks.length =unit(.2,'cm'),
strip.text = element_text(size = 10, margin = margin(l=10, b = .1)))