I need to pass column names to the plotting function. The problem is that my column names are in the arguments and I don't know how to extract those as text.
Here is code that works, but I need to get the strings dynamically from the arguments.
create_plot <- function(df, group, subgroup, y){
var_group <- enquo(group)
var_subgroup <- enquo(subgroup)
var_y <- enquo(y)
df %>%
select(!!var_group, !!var_subgroup, !!var_y) %>%
mutate(!!var_group := as.factor(!!var_group),
!!var_subgroup := as.factor(!!var_subgroup)) %>%
ggsummarystats(., x = "COUNTRY", y="VALUE", # Need to get these from the arguments
palette = custom_pal,
ggfunc = ggboxplot,
color = "YEAR", #Same here
fill = "YEAR", #And here
summaries = c("n", "mean"))
}
create_plot(sales, YEAR, COUNTRY, VALUE)
Example using mtcars:
create_plot <- function(df, group, subgroup, y){
var_group <- enquo(group)
var_subgroup <- enquo(subgroup)
var_y <- enquo(y)
df %>%
select(!!var_group, !!var_subgroup, !!var_y) %>%
mutate(!!var_group := as.factor(!!var_group),
!!var_subgroup := as.factor(!!var_subgroup)) %>%
ggsummarystats(., x = "carb", y="mpg",
palette = custom_pal,
ggfunc = ggboxplot,
fill = "gear",
color = "gear",
summaries = c("n", "mean"))
}
create_plot(mtcars, gear, carb, mpg)
You can use rlang::as_name() for that:
library(tidyverse)
library(rlang)
library(ggpubr)
create_plot <- function(df, group, subgroup, y){
var_group <- enquo(group)
var_subgroup <- enquo(subgroup)
var_y <- enquo(y)
df %>%
select(!!var_group, !!var_subgroup, !!var_y) %>%
mutate(
!!var_group := as.factor(!!var_group),
!!var_subgroup := as.factor(!!var_subgroup)
) %>%
ggsummarystats(
x = as_name(var_group),
y = as_name(var_y),
fill = as_name(var_subgroup),
ggfunc = ggboxplot,
summaries = c("n", "mean"))
}
create_plot(mtcars, gear, carb, mpg)
Created on 2021-06-14 by the reprex package (v1.0.0)
Related
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 2 years ago.
Improve this question
I want to create and save all the possible geom_col charts of a dataset with categorical variables on Y axis and average (grouping variable being y) value of numeric variables on x axis.
I borrowed ideas from this and this link and created the following code, but struggling to make it work. Please advise.
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
plots <- bind_rows(rep(list(mpg), nrow(vars)), .id = "grp") %>%
nest_by(grp) %>%
bind_cols(vars) %>%
rowwise() %>%
mutate(plot = list(~(data %>%
group_by(y) %>%
summarise("{x}" = mean(.data[[x]], na.rm = T)) %>%
ungroup()) %>%
ggplot() +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()))),
filename = paste0(x, "_by_", y, ".pdf")) %>%
select(filename, plot)
pwalk(plots, ggsave, path = getwd())
There is no need to bind_rows, nest, ... Instead:
Put your plotting code in a helper function instead of wrapping everything in a pipeline. This allows for much easier debugging as well as cleaner and clearer code.
Use map2 to loop over the columns of your df vars
One issue with your code was summarise("{x}" = mean(.data[[x]], na.rm = T)) which will create a variable named {x}. Instead use e.g. !!sym(x) := ... to assign the value back to x if x is a string.
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
make_plot <- function(data, x, y) {
data <- data %>%
group_by(across(all_of(y))) %>%
summarise(!!sym(x) := mean(.data[[x]], na.rm = T), .groups = "drop") %>%
ungroup()
ggplot(data) +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()
}
plots <- map2(vars$x, vars$y, make_plot, data = mpg)
length(plots)
#> [1] 25
plots[[1]]
# Export
pwalk(list(x = vars$x, y = vars$y, p = plots), function(x, y, p) ggsave(filename = paste0(x, "_by_", y, ".pdf"), plot = p, path = getwd()))
I've put together a plot to view groups separately but now want to include significance levels for mean pairwise comparison in the plot. While I can do the comparison outside of the plot I'm wondering what the most efficient way of including the comparison in the plot would be?
Current Plot
library(tidyverse)
dsub <- diamonds[ sample(nrow(diamonds), 10000), ]
dsub <- dsub %>%
filter(clarity %in% c('VS2', 'VS1', 'VVS2'))
ggplot(dsub, aes(x = cut, y = carat, fill = clarity)) +
geom_boxplot(outlier.size = 0) +
geom_point(pch = 21, position = position_jitterdodge())
Now I want to add the comparisons within each level of the cut variable between all levels of the clarity variable. I prefer using ggpubr but couldn't see where this could be achieved.
EDITED to take OP preference for output into account
Ahhhh... okay well let me at least save you a bunch of vertical space and neaten things up by overcoming the fact that rstatix doesn't honor the order of your factors and ggpubr wants its groups as character not factor.
library(ggplot2)
library(dplyr)
dsub <- diamonds[ sample(nrow(diamonds), 10000), ]
dsub <- dsub %>%
filter(clarity %in% c('VS2', 'VS1', 'VVS2'))
dsub <- droplevels(dsub)
dsub_stats <-
dsub %>%
group_by(cut) %>%
rstatix::wilcox_test(carat~clarity) %>%
mutate(group1 = factor(group1,
ordered = TRUE,
levels = c("VS2", "VS1", "VVS2"))) %>%
arrange(cut, group1) %>%
mutate(group1 = as.character(group1)) %>%
rstatix::add_xy_position(x='cut')
ggpubr::ggboxplot(dsub, x = "cut", y = "carat",
color = "clarity",
add='jitter') +
ggpubr::stat_pvalue_manual(dsub_stats,
label = "p.adj.signif",
tip.length = 0.01)
Created on 2020-09-24 by the reprex package (v0.3.0)
library(tidyverse)
library(rstatix)
library(ggpubr)
dsub <- diamonds[ sample(nrow(diamonds), 10000), ]
dsub <- dsub %>%
filter(clarity %in% c('VS2', 'VS1', 'VVS2'))
dsub_stats <- dsub %>%
group_by(cut) %>%
wilcox_test(carat~clarity) %>% add_xy_position(x='cut')
ggboxplot(dsub, x = "cut", y = "carat",
color = "clarity",
add='jitter'
) +
stat_pvalue_manual(dsub_stats, label = "p.adj.signif", tip.length = 0.01)
df <- read.csv ('https://raw.githubusercontent.com/ulklc/covid19-
timeseries/master/countryReport/raw/rawReport.csv',
stringsAsFactors = FALSE)
df6 <- aggregate(recovered ~ region, subset(df), sum)
df4 <- aggregate(death ~ region, subset(df), sum)
How can I show the df6 and df4 data with different lines on the same chart.
with different lines on the line graph.
Here's a base R approach:
plotdf <- aggregate(cbind(recovered,death) ~ region, df, sum)
rownames(plotdf) <- plotdf$region
plotdf <- as.matrix(plotdf[,-1])
barplot(t(as.matrix(plotdf)), beside = TRUE, col = c("green","red"))
legend("topleft",c("Recovered","Died"), fill = c("green","red"))
And here's the (better?) "tidyverse" way:
library(dplyr)
library(tidyr)
library(ggplot2)
df %>%
group_by(region) %>%
summarize(recovered = sum(recovered),
death = sum(death)) %>%
pivot_longer(-region) %>%
ggplot(aes(x = region, y = value, fill = name)) +
geom_bar(position = "dodge", stat="identity") +
labs(x = "Region", y = "Number", fill = "Status")
I'm trying to make a function that will use ggplot2inside,aes_stringand reorder but with no luck so far.
Basically if we have a sample dataset like the following:
library(ggplot2)
library(dplyr)
set.seed(123)
dt <- data.frame(
id = c(1,1,1,2,2),
a = c("b", "d", "c", "a", "b"),
b = sample(1:10, 5, replace = F),
cat = c(1,1,2,2,2)) %>%
mutate(a = as.factor(a)) %>%
as_tibble()
I want the function to accept the following arguments: the dataset, a filtering variable, and two variables for plotting.
This is what I managed to do:
myplot <- function(df, filtval, var1, var2) {
data <- df %>% filter(id == filtval)
ggplot(data) +
geom_point(
aes_string(
x = reorder(var1, var2),
y = var2)
)
}
Unfortunately when running it returns the error:
myplot(dt, 1, "a", "b")
Warning message:
In mean.default(X[[i]], ...) :
argument is not numeric or logical: returning NA
This is what I want the function to do:
data <- dt %>% filter(id == 1)
ggplot(data) +
geom_col(aes(x = reorder(a, - b), y = b))
With the latest version of ggplot, you should be use aes with !! and sym() to turn your strings into symbols.
myplot <- function(df, filtval, var1, var2) {
data <- df %>% filter(id == filtval)
ggplot(data) +
geom_point(
aes(
x = reorder(!!sym(var1), !!sym(var2)),
y = !!sym(var2))
)
}
After discussing with mr Flick (see below), this should do NOT be used:
myplot <- function(df, filtval, var1, var2) {
data <- df %>% filter(id == filtval)
data$new_order <- reorder(data[[var1]], data[[var2]])
ggplot(data) +
geom_point(mapping=
aes_string(
x = "new_order",
y = var2)
)
}
Take his solution instead :)
Im working on this df:
library("ggplot2")
library("reshape2")
library("tidyr")
library("scales")
library("dplyr")
Col0 <- c("AA", "BB", "CC", "DD","EE","FF")
D01012015 <- c(2,2,2,6,1,NA)
D02012015 <- c(2,2,2,1,3,1)
D03012015 <- c(2,2,3,4,6,4)
D04012015 <- c(2,2,3,1,2,4)
D05012015 <- c(2,1,1,1,1,0)
D06012015 <- c(2,4,2,5,4,9)
D07012015 <- c(2,4,2,5,4,1)
D08012015 <- c(2,2,3,4,5,3)
D09012015 <- c(1,3,3,2,2,1)
D10012015 <- c(1,3,3,2,2,1)
D11012015 <- c(1,3,3,2,4,1)
D12012015 <- c(1,3,3,4,2,1)
D13012015 <- c(1,3,5,2,2,1)
D14012015 <- c(1,3,3,7,2,1)
D15012015 <- c(1,3,3,7,2,7)
df<-data.frame(Col0,D01012015,D02012015,D03012015,D04012015,D05012015,D06012015,D07012015,D08012015,D09012015,D10012015,D11012015,
D12012015,D13012015,D14012015,D15012015)
I know that normally, if i'd like to print a value per week on the x axis i should create this ggplot function:
f<-melt(df,id =c("Col0"))
f$date<-as.Date(f$variable, format="D%d%m%Y")
pl<- ggplot(f, aes(date, value, fill=Col0))+ geom_line(aes(color=Col0,group=Col0))+ scale_x_date(breaks = date_breaks("1 week"))
My problem is that i have to create the same x axis values, using this function:
plotfun = function(data) {
xval<-"dates"
column<- names(data)[1]
data %>%
gather_(xval, "Val", select_vars_(names(.),
names(.),
exclude = column)) %>%
ggplot(aes_string(xval, "Val", group = column, col = column)) +
facet_grid(as.formula(paste(column, "~."))) +
geom_line()
}
plotfun(df)
I don't know how to transform in dates the x values with gather and how to jump values as in the previous ggplot function
Can you not just put in a mutate statement?
plotfun <- function(data) {
xval <- "dates"
column <- names(data)[1]
data %>%
gather_(xval, "Val", select_vars_(names(.),
names(.),
exclude = column)) %>%
mutate(dates = as.Date(f$variable, format = "D%d%m%Y")) %>%
ggplot(aes_string(xval, "Val", group = column, col = column)) +
facet_grid(as.formula(paste(column, "~."))) +
geom_line()
}
plotfun(df)