Incorrect p-value position on ggplots using rstatix - r

I am having trouble placing the p-values in the correct position on the y axis of a ggplot using rstatix. I can get the example provided on the package author's blog to work fine, but when I change the values, the positions are incorrect. Here is the working version:
library(tidyverse)
library(rstatix)
##Example provided by the package author which works correctly
df <- ToothGrowth%>%
as_tibble()
#Check df
df
#Stats calculation
stat.test <- df %>%
group_by(dose) %>%
t_test(len ~ supp) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
# Make facet and add p-values
stat.test <- stat.test %>% add_xy_position(x = "supp", fun = "max")
#Check p value positions - y.position looks good
stat.test
#Plot
ggplot(df, aes(x = supp, y = len)) +
geom_boxplot() +
geom_jitter() +
facet_wrap( ~ dose, scales = "free") +
stat_pvalue_manual(stat.test, hide.ns = F,
label = "{p.adj}")
However, when I change the values, the position of the p values are too high.
## My example which plots incorrectly
##--- This is a very inelegant way to change the values!!
df <- ToothGrowth %>%
mutate(helper = paste0(supp, dose))
df$RecordingNo <- ave(seq.int(nrow(df)), df$helper, FUN = seq_along)
df <- df %>%
select(-helper) %>%
pivot_wider(names_from = c(dose), values_from = len) %>%
mutate(`0.5` = `0.5` * 0.1) %>%
mutate(`2` = `2` * 10) %>%
select(-RecordingNo) %>%
pivot_longer(-supp) %>%
rename(len = value, dose = name) %>%
mutate(dose = as_factor(dose)) %>%
as_tibble()
#Check df
df
##------
#This code is exactly the same as the working code above.
#Stats calculation
stat.test <- df %>%
group_by(dose) %>%
t_test(len ~ supp) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
# Make facet and add p-values
stat.test <- stat.test %>% add_xy_position(x = "supp", fun = "max")
#Check p value positions - y.position looks incorrect
stat.test
ggplot(df, aes(x = supp, y = len)) +
geom_boxplot() +
geom_jitter() +
facet_wrap( ~ dose, scales = "free") +
stat_pvalue_manual(stat.test, hide.ns = F,
label = "{p.adj}")
I guess there is a difference in the second dataframe which is causing the problems, but I can't figure it out. Thanks!

Like the scales option on facet_wrap, there is a scales option on add_xy_position that controls the p value position . As I am using "facet_wrap(...,scales = "free")" I should use add_xy_position(...,scales = "free") to make sure the positions match.
In my example:
stat.test <- stat.test %>% add_xy_position(x = "supp", fun = "max",scales = "free")
ggplot(df, aes(x = supp, y = len)) +
geom_boxplot() +
geom_jitter() +
facet_wrap( ~ dose, scales = "free") +
stat_pvalue_manual(stat.test, hide.ns = F,
label = "{p.adj}")
Answer from author's Github page.

Related

change guide_legend order in ggplotly [duplicate]

I would like to reverse the order of the legend for a horizontal bar chart. When adding guides(fill = guide_legend(reverse = TRUE)) to the ggplot it works fine (see second plot). However, after applying ggplotly() the legend is again in the default order.
How to reverse the order of the plotly legend without changing the order of the bars?
library(ggplot2)
library(dplyr)
data(mtcars)
p1 <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
p1
p2 <- p1 + guides(fill = guide_legend(reverse = TRUE))
p2
plotly::ggplotly(p2)
Adding to the great answer of #Zac Garland here is a solution that works with legends of arbitrary length:
library(ggplot2)
library(dplyr)
reverse_legend_labels <- function(plotly_plot) {
n_labels <- length(plotly_plot$x$data)
plotly_plot$x$data[1:n_labels] <- plotly_plot$x$data[n_labels:1]
plotly_plot
}
p1 <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
p2 <- mtcars %>%
count(am, cyl) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(am, n, fill = cyl)) +
geom_col(position = "dodge") +
coord_flip()
p1 %>%
plotly::ggplotly() %>%
reverse_legend_labels()
p2 %>%
plotly::ggplotly() %>%
reverse_legend_labels()
When you call ggplotly, it's really just creating a list and a function call on that list.
So if you save that intermediate step, you can modify the list directly. and as such, modify the plot output.
library(ggplot2)
library(dplyr)
data(mtcars)
p1 <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(am)) %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
html_plot <- ggplotly(p1)
replace_1 <- html_plot[["x"]][["data"]][[2]]
replace_2 <- html_plot[["x"]][["data"]][[1]]
html_plot[["x"]][["data"]][[1]] <- replace_1
html_plot[["x"]][["data"]][[2]] <- replace_2
html_plot
plot output
A simple solution is to define the order of the levels of the factor variable am:
library(ggplot2)
library(dplyr)
data(mtcars)
df <- mtcars %>%
count(cyl, am) %>%
mutate(cyl = factor(cyl), am = factor(as.character(am), levels = c("1", "0")))
head(df)
p1 <- df %>%
ggplot(aes(cyl, n, fill = am)) +
geom_col(position = "dodge") +
coord_flip()
p1
plotly::ggplotly(p1)

gganimate:combine barplot with lineplot

library(ggplot2)
library(tidyverse)
library(gganimate)
set.seed(321)
year <- 2001:2021
value <- runif(n = length(year))*100%>% round(digits = 2)
df <- data.frame(year, value) %>%
mutate(cumsum = cumsum(value))
df %>%
ggplot(aes(x = year)) +
geom_col(aes(y = value)) +
geom_line(aes(y = cumsum)) +
transition_states(year) +
shadow_mark()
How to get an animate for both bar and line plots along the years, there is an error with the code above.
Error in transform_path(all_frames, next_state, ease, params$transition_length[i], :
transformr is required to tween paths and lines
Install transformr() package
library(ggplot2)
library(tidyverse)
library(gganimate)
library(transformr)
set.seed(321)
year <- 2001:2021
value <- runif(n = length(year))*100%>% round(digits = 2)
df <- data.frame(year, value) %>%
mutate(cumsum = cumsum(value))
df %>%
ggplot(aes(x = year)) +
geom_col(aes(y = value)) +
geom_line(aes(y = cumsum)) +
transition_states(year) +
shadow_mark()

How to subset data in a ggplot panel chart?

I am trying create a panel chart in ggplot with four variables which all have their own scale for the y axis. I can get the structure of the panel chart to work but am having trouble actually getting each data set onto the gird. I have been following a script I found online. See below however I am getting the following error when I try and use the subset function further down in the script.
Error in .(variable == "Count") : could not find function "."
#load data
#Data source: data analysis-gullies > R Stats Input > Panel Chart
df <- read.csv(file.choose(), header = T)
View(df)
#load library
library(ggplot2)
library(reshape2)
dfm <- melt(df, id.vars =c("Interval"))
View(dfm)
test <- ggplot(dfm, aes(Interval, value, ymin = 0,
ymax = value, colour = "grey20"))+ scale_colour_identity() +
xlim(5,1115)+ facet_grid(variable ~ ., scales = "free", as.table = FALSE)+
theme_bw() + theme(panel.spacing = unit(0, "lines"), axis.title.x = element_blank(),
axis.title.y = element_text())
test
test1 <- test + geom_col(subset = .(variable == "Count"))
test2 <- test1 + geom_col(subset = .(variable == "Length"))
test3 <- test2 + geom_col(subset = .(variable == "Area"))
test4 <- test3 + geom_col(subset = .(variable == "Volume"))
You can use the patchwork package to merge individual ggplot2 objects to get individual axes for each panel:
library(tidyverse)
library(patchwork)
iris %>%
nest(-Species) %>%
mutate(
plt = data %>% map2(Species, ~ {
.x %>%
ggplot(aes(Sepal.Width, Sepal.Length)) +
geom_point() +
labs(title = .y)
})
) %>%
pull(plt) %>%
wrap_plots()
You can also add logic to plot different plots per panel:
library(tidyverse)
library(patchwork)
iris %>%
nest(-Species) %>%
mutate(
plt = data %>% map2(Species, ~ {
if(.y == "setosa") {
.x %>%
ggplot(aes(Sepal.Width, Sepal.Length)) +
geom_point() +
labs(title = .y)
} else {
.x %>%
ggplot(aes(Sepal.Width, Sepal.Length)) +
geom_line() +
labs(title = .y)
}
})
) %>%
pull(plt) %>%
wrap_plots()
If the panel plots are very different from each other (e.g. different variables for the x and y axes), it is recommended to create each plot individually and then call wrap_plots of all the plot objects:
plt1 <- qplot(Sepal.Length, Sepal.Width, data = iris, geom = "point")
plt2 <- qplot(Petal.Length, Petal.Width, data = iris, geom = "line")
wrap_plots(plt1, plt2, nrow = 1)

How to sort bars after grouped top_n in facet_wrap with ggplot2?

I'm facing an issue with sorting bars when using facet_wrap (which is commonly reported here, here and others) after group variables and get top values.
When I run the code without factor conversion, bars are ordered:
iris %>%
gather(key = measurements, value = values, - Species) %>%
mutate(kk = factor(measurements, levels = unique(.$measurements)),
species_l = with(., paste(Species, .$measurements, sep = "_"))) %>%
ggplot(aes(x = reorder(species_l, values),
y = values,
fill = kk)) +
geom_bar(stat = "identity") +
facet_wrap(.~kk,
scales = "free")
But now I want to order decreasingly bars within facet_wrap and after top_n.
Heres is what I've tried so far:
library(tidyverse)
iris %>%
gather(key = measurements, value = values, - Species) %>%
within(.,
Species <- factor(Species,
levels=names(sort(table(Species),
decreasing=FALSE)))) %>%
ggplot(aes(x = Species,
y = values,
fill = measurements)) +
geom_bar(stat = "identity") +
facet_wrap(.~ measurements,
scales = "free")
and this:
iris %>%
gather(key = measurements, value = values, - Species) %>%
group_by(measurements, Species) %>%
top_n(5, wt = values) %>%
ggplot(aes(x = reorder(Species, Species,
function(x)-length(x)),
y = values,
fill = measurements)) +
geom_bar(stat = "identity") +
facet_wrap(.~measurements,
scales = "free")
and this:
iris %>%
gather(key = measurements, value = values, - Species) %>%
mutate(kk = factor(measurements, levels = unique(.$measurements)),
species_l = with(., paste(Species, .$measurements, sep = "_"))) %>%
group_by(measurements, Species) %>%
top_n(5, wt = values) %>%
ungroup() %>%
ggplot(aes(x = reorder(species_l, values),
y = values,
fill = kk)) +
geom_bar(stat = "identity") +
facet_wrap(.~kk,
scales = "free")
This is what I get:
As you can see Sepal.Width bars are not sorted.
Your first attempt was close -- you need to make sure you're reordering per facet, and not just reordering the factor based on the top 5 values of all measurements. Julia Silge explains thoroughly here
library(tidytext)
library(tidyverse)
library(magtrittr)
iris %>%
gather(key = measurements, value = values, - Species) %>%
mutate(kk = factor(measurements, levels = unique(.$measurements)),
#The '-values' below specifies to order in descending
Species = reorder_within(Species, -values, measurements)) %>%
ggplot(aes(x = Species, y = values, fill = kk)) +
geom_bar(stat = "identity") +
facet_wrap(.~kk, scales = "free") +
scale_x_reordered()`

How to create custom color palette to be used by scale_fill_manual()

Consider the following code that makes a bar chart with a purple color palette
library(dplyr)
library(ggplot2)
dd <- mpg %>%
group_by(manufacturer, cyl) %>%
summarise(n = n()) %>%
ungroup()
mm <- dd %>%
group_by(manufacturer) %>%
summarise(mcyl = weighted.mean(cyl, n)) %>%
arrange(mcyl) %>%
ungroup()
dd %>% left_join(mm) %>%
ggplot(mapping = aes(x = reorder(manufacturer, mcyl), y = n, fill = factor(cyl))) +
geom_bar(stat = "identity", position = "fill") +
coord_flip() +
scale_fill_brewer(palette = "Purples")
Question: How can I make the palette for Audi red ("Reds") and for Ford blue ("Blues"), while keeping the others purple ("Purples")?
What is the most convenient (preferably tidyverse) way to put these red/blue/purple palettes in a variable and passing it to scale_fill_manual() (as explained in this related Q&A)?
Full working solution:
cyl <- sort(unique(mpg$cyl))
ncat <- length(cyl) # 4 types of cylinders
# create palettes
library(RColorBrewer)
purples <- tibble(cyl, colr = brewer.pal(ncat, "Purples"))
reds <- tibble(manufacturer = "audi", cyl, colr = brewer.pal(ncat, "Reds"))
blues <- tibble(manufacturer = "ford", cyl, colr = brewer.pal(ncat, "Blues"))
# merge them with the data
dd_p <- dd %>% filter(!(manufacturer %in% c("audi", "ford"))) %>% left_join(purples)
dd_r <- dd %>% filter(manufacturer == "audi") %>% left_join(reds)
dd_b <- dd %>% filter(manufacturer == "ford") %>% left_join(blues)
gg_dd <- rbind(dd_p, dd_r, dd_b) %>%
left_join(mm)
gg_dd %>%
ggplot(mapping = aes(x = reorder(manufacturer, mcyl), y = n, fill = colr)) +
geom_bar(stat = "identity", position = "fill") +
coord_flip() +
scale_fill_identity()

Resources