How to subset data in a ggplot panel chart? - r

I am trying create a panel chart in ggplot with four variables which all have their own scale for the y axis. I can get the structure of the panel chart to work but am having trouble actually getting each data set onto the gird. I have been following a script I found online. See below however I am getting the following error when I try and use the subset function further down in the script.
Error in .(variable == "Count") : could not find function "."
#load data
#Data source: data analysis-gullies > R Stats Input > Panel Chart
df <- read.csv(file.choose(), header = T)
View(df)
#load library
library(ggplot2)
library(reshape2)
dfm <- melt(df, id.vars =c("Interval"))
View(dfm)
test <- ggplot(dfm, aes(Interval, value, ymin = 0,
ymax = value, colour = "grey20"))+ scale_colour_identity() +
xlim(5,1115)+ facet_grid(variable ~ ., scales = "free", as.table = FALSE)+
theme_bw() + theme(panel.spacing = unit(0, "lines"), axis.title.x = element_blank(),
axis.title.y = element_text())
test
test1 <- test + geom_col(subset = .(variable == "Count"))
test2 <- test1 + geom_col(subset = .(variable == "Length"))
test3 <- test2 + geom_col(subset = .(variable == "Area"))
test4 <- test3 + geom_col(subset = .(variable == "Volume"))

You can use the patchwork package to merge individual ggplot2 objects to get individual axes for each panel:
library(tidyverse)
library(patchwork)
iris %>%
nest(-Species) %>%
mutate(
plt = data %>% map2(Species, ~ {
.x %>%
ggplot(aes(Sepal.Width, Sepal.Length)) +
geom_point() +
labs(title = .y)
})
) %>%
pull(plt) %>%
wrap_plots()
You can also add logic to plot different plots per panel:
library(tidyverse)
library(patchwork)
iris %>%
nest(-Species) %>%
mutate(
plt = data %>% map2(Species, ~ {
if(.y == "setosa") {
.x %>%
ggplot(aes(Sepal.Width, Sepal.Length)) +
geom_point() +
labs(title = .y)
} else {
.x %>%
ggplot(aes(Sepal.Width, Sepal.Length)) +
geom_line() +
labs(title = .y)
}
})
) %>%
pull(plt) %>%
wrap_plots()
If the panel plots are very different from each other (e.g. different variables for the x and y axes), it is recommended to create each plot individually and then call wrap_plots of all the plot objects:
plt1 <- qplot(Sepal.Length, Sepal.Width, data = iris, geom = "point")
plt2 <- qplot(Petal.Length, Petal.Width, data = iris, geom = "line")
wrap_plots(plt1, plt2, nrow = 1)

Related

How to automatically position multiple model evaluation parameters in facetted ggplot2?

I am trying to automatically position multiple model evaluation parameters in facetted ggplot. This answer has helped me to put the R2 and RMSE in facetted ggplot automatically using the following code
library(caret)
library(tidyverse)
summ <- iris %>%
group_by(Species) %>%
summarise(Rsq = R2(Sepal.Length, Petal.Length),
RMSE = RMSE(Sepal.Length, Petal.Length)) %>%
mutate_if(is.numeric, round, digits=2)
p <- ggplot(data=iris, aes(x = Sepal.Length, y = Petal.Length)) +
geom_point(color="blue",alpha = 1/3) +
facet_wrap(Species ~ ., scales="free") +
geom_smooth(method=lm, fill="black", formula = y ~ x) +
xlab("Sepal Length") +
ylab("Petal Length") + theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# Here we create our annotations data frame.
df.annotations <- data.frame()
# Rsq
df.annotations <- rbind(df.annotations,
cbind(as.character(summ$Species),
paste("Rsq", summ$Rsq,
sep = " = ")))
# RMSE
df.annotations <- rbind(df.annotations,
cbind(as.character(summ$Species),
paste("RMSE", summ$RMSE,
sep = " = ")))
# This here is important, especially naming the first column
# Species
colnames(df.annotations) <- c("Species", "label")
vertical_adjustment = ifelse(grepl("Rsq",df.annotations$label),1.5,3)
p + geom_text(data=df.annotations,aes(x=-Inf,y=+Inf,label=label),
hjust = -0.1, vjust = vertical_adjustment, size=3.5)
I have calculated NSE using hydroGOF package like
library(hydroGOF)
summ <- iris %>%
group_by(Species) %>%
summarise(Rsq = R2(Sepal.Length, Petal.Length),
RMSE = RMSE(Sepal.Length, Petal.Length),
NSE = NSE(Sepal.Length, Petal.Length)) %>%
mutate_if(is.numeric, round, digits=2)
Added the NSE to the df.annotations dataframe like
# NSE
df.annotations <- rbind(df.annotations,
cbind(as.character(summ$Species),
paste("NSE", summ$NSE,
sep = " = ")))
Now, how can I place multiple model evaluation parameters in facetted ggplot2?
This is the new part using case_when from dplyr package for aligning vertically:
library(dplyr)
vertical_adjustment = case_when(grepl("Rsq",df.annotations$label) ~ 1.5,
grepl("RMSE",df.annotations$label) ~ 3,
grepl("NSE",df.annotations$label) ~ 4.5)
p + geom_text(data=df.annotations,aes(x=-Inf,y=+Inf,label=label),
hjust = -0.1, vjust = vertical_adjustment, size=3.5)
The whole code:
library(caret)
library(tidyverse)
library(hydroGOF)
summ <- iris %>%
group_by(Species) %>%
summarise(Rsq = R2(Sepal.Length, Petal.Length),
RMSE = RMSE(Sepal.Length, Petal.Length),
NSE = NSE(Sepal.Length, Petal.Length)) %>%
mutate_if(is.numeric, round, digits=2)
p <- ggplot(data=iris, aes(x = Sepal.Length, y = Petal.Length)) +
geom_point(color="blue",alpha = 1/3) +
facet_wrap(Species ~ ., scales="free") +
geom_smooth(method=lm, fill="black", formula = y ~ x) +
xlab("Sepal Length") +
ylab("Petal Length") + theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p
# Here we create our annotations data frame.
df.annotations <- data.frame()
# Rsq
df.annotations <- rbind(df.annotations,
cbind(as.character(summ$Species),
paste("Rsq", summ$Rsq,
sep = " = ")))
# RMSE
df.annotations <- rbind(df.annotations,
cbind(as.character(summ$Species),
paste("RMSE", summ$RMSE,
sep = " = ")))
df.annotations <- rbind(df.annotations,
cbind(as.character(summ$Species),
paste("NSE", summ$NSE,
sep = " = ")))
# This here is important, especially naming the first column
# Species
colnames(df.annotations) <- c("Species", "label")
library(dplyr)
vertical_adjustment = case_when(grepl("Rsq",df.annotations$label) ~ 1.5,
grepl("RMSE",df.annotations$label) ~ 3,
grepl("NSE",df.annotations$label) ~ 4.5)
p + geom_text(data=df.annotations,aes(x=-Inf,y=+Inf,label=label),
hjust = -0.1, vjust = vertical_adjustment, size=3.5)

Incorrect p-value position on ggplots using rstatix

I am having trouble placing the p-values in the correct position on the y axis of a ggplot using rstatix. I can get the example provided on the package author's blog to work fine, but when I change the values, the positions are incorrect. Here is the working version:
library(tidyverse)
library(rstatix)
##Example provided by the package author which works correctly
df <- ToothGrowth%>%
as_tibble()
#Check df
df
#Stats calculation
stat.test <- df %>%
group_by(dose) %>%
t_test(len ~ supp) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
# Make facet and add p-values
stat.test <- stat.test %>% add_xy_position(x = "supp", fun = "max")
#Check p value positions - y.position looks good
stat.test
#Plot
ggplot(df, aes(x = supp, y = len)) +
geom_boxplot() +
geom_jitter() +
facet_wrap( ~ dose, scales = "free") +
stat_pvalue_manual(stat.test, hide.ns = F,
label = "{p.adj}")
However, when I change the values, the position of the p values are too high.
## My example which plots incorrectly
##--- This is a very inelegant way to change the values!!
df <- ToothGrowth %>%
mutate(helper = paste0(supp, dose))
df$RecordingNo <- ave(seq.int(nrow(df)), df$helper, FUN = seq_along)
df <- df %>%
select(-helper) %>%
pivot_wider(names_from = c(dose), values_from = len) %>%
mutate(`0.5` = `0.5` * 0.1) %>%
mutate(`2` = `2` * 10) %>%
select(-RecordingNo) %>%
pivot_longer(-supp) %>%
rename(len = value, dose = name) %>%
mutate(dose = as_factor(dose)) %>%
as_tibble()
#Check df
df
##------
#This code is exactly the same as the working code above.
#Stats calculation
stat.test <- df %>%
group_by(dose) %>%
t_test(len ~ supp) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
# Make facet and add p-values
stat.test <- stat.test %>% add_xy_position(x = "supp", fun = "max")
#Check p value positions - y.position looks incorrect
stat.test
ggplot(df, aes(x = supp, y = len)) +
geom_boxplot() +
geom_jitter() +
facet_wrap( ~ dose, scales = "free") +
stat_pvalue_manual(stat.test, hide.ns = F,
label = "{p.adj}")
I guess there is a difference in the second dataframe which is causing the problems, but I can't figure it out. Thanks!
Like the scales option on facet_wrap, there is a scales option on add_xy_position that controls the p value position . As I am using "facet_wrap(...,scales = "free")" I should use add_xy_position(...,scales = "free") to make sure the positions match.
In my example:
stat.test <- stat.test %>% add_xy_position(x = "supp", fun = "max",scales = "free")
ggplot(df, aes(x = supp, y = len)) +
geom_boxplot() +
geom_jitter() +
facet_wrap( ~ dose, scales = "free") +
stat_pvalue_manual(stat.test, hide.ns = F,
label = "{p.adj}")
Answer from author's Github page.

ggplot2 to create multiple plots but without a facet_wrap

I want to plot four independent (in 2 rows and 2 columns) plots, each with a different flipped x axis. I have used ggplot with a face_wrap but this approach doesn't give each plot its own flipped x-axis labels.
Is there a way to achieve my goal in ggplot2?
library(tidyverse)
data <- read_csv('https://raw.githubusercontent.com/rnorouzian/e/master/surv.csv')
names(data)[2:5] <- c("Representation", "Solidification", "Application", "Confidence")
data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = str_wrap(value, 50)) %>%
ggplot() +
geom_bar(aes(value, fill = name), show.legend = FALSE) +
facet_wrap(.~name) +
coord_flip() +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))
Adding scales = 'free_y' to your original code.
Edit: add code to manually change order of the levels.
level_order <- c("Neutral",
"Agree",
"Strongly Agree",
"The assignment gave me a great opportunity to\napply what I learned",
"The assignment gave me an opportunity to apply\nwhat I learned",
"The assignment helped me solidify the key concepts",
"The assignment highly helped me solidify the key\nconcepts",
"The assignment highly reflected the class\ninstructions",
"The assignment reflected the class instructions",
"The assignment somewhat reflected the class\ninstructions")
data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = str_wrap(value, 50),
value = factor(value, levels = level_order)) %>%
ggplot() +
geom_bar(aes(value, fill = name), show.legend = FALSE) +
facet_wrap(.~name, scales = 'free_y') +
coord_flip() +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))
I would suggest next approach:
#Code
library(tidyverse)
data <- read_csv('https://raw.githubusercontent.com/rnorouzian/e/master/surv.csv')
names(data)[2:5] <- c("Representation", "Solidification", "Application", "Confidence")
The option 1 would be adjusting facet_wrap():
#Option 1
data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = str_wrap(value, 50)) %>%
ggplot() +
geom_bar(aes(value, fill = name), show.legend = FALSE) +
facet_wrap(.~name,scales='free') +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))
And the second option would be create a list and a function for the plots:
#Option 2
data2 <- data %>%
pivot_longer(cols = -id) %>%
mutate(name = name,
value = str_wrap(value, 50))
#Split
LData2 <- split(data2,data2$name)
#Now function to plot
myfun <- function(x)
{
mplot <- ggplot(x,aes(value, fill = name)) +
geom_bar(show.legend = FALSE) +
labs(y = "Students", x = "") +
theme(axis.text.y = element_text(size=8))+
ggtitle(unique(x$name))
return(mplot)
}
#Apply
Lplots <- lapply(LData2,myfun)
Some example output:
#Example
Lplots[[1]]
Or using patchwork with wrap_plots() in the list:
library(patchwork)
#Code
wrap_plots(Lplots)
Output:
Or changing the axis orientation and using patchwork:
#Now function to plot 2
myfun <- function(x)
{
mplot <- ggplot(x,aes(value, fill = name)) +
geom_bar(show.legend = FALSE) +
labs(y = "Students", x = "") +
coord_flip()+
theme(axis.text.y = element_text(size=8))+
ggtitle(unique(x$name))
return(mplot)
}
#Apply
Lplots <- lapply(LData2,myfun)
#Wrap
wrap_plots(Lplots)
Output:

ggplot - plotting bars and lines in the same chart

I need to generate a plot with bar graph for two variables and a line with the third variable.
I can create a column graph for one variable like below
df <- head(mtcars)
df$car <- row.names(df)
ggplot(df) + geom_col(aes(x=car, y=disp))
Ref this answer - I can plot two variables - disp and hp as below
library(tidyr)
df$car = row.names(df)
df_long = gather(df, key = var, value = value, disp, hp)
ggplot(df_long, aes(x = car, y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'dodge')
I need to have a third variable qsec plotted as a line like as in the below chart - how to go about this ?
You can try:
library(tidyverse)
# some data
data <- mtcars %>%
mutate(car = rownames(mtcars)) %>%
slice(1:6) %>%
select(car, disp, hp)
data %>%
gather(key, value, -car) %>%
group_by(car) %>%
mutate(qsec_value = median(value)) %>%
mutate(qsec = factor("qsec")) %>%
ggplot() +
geom_col(aes(x=car, y=value, fill = key), position = "dodge") +
geom_point(aes(x=car, y=qsec_value,color = qsec)) +
geom_line(aes(x=car, y=qsec_value, color = qsec, group =1)) +
scale_colour_manual(name= "", values = 1) +
theme(legend.position = "top",
legend.title = element_blank())
Less code, same result:
data %>%
pivot_longer(-1) %>%
ggplot(aes(x = car)) +
geom_col(aes(y=value, fill = name), position = "dodge") +
stat_summary(aes(y=value, group=1, color="qseq"), fun = "median", geom = "point")+
stat_summary(aes(y=value, group=1, color="qseq"), fun = "median", geom = "line")+
scale_colour_manual(name= "", values = 1)
You need another layer and because geom_line is for continuous data, you need to do as if your x-values are for the line-layer. By doing so, order of data becomes crucial, hence you have also to sort it:
gather(df, key = var, value = value, disp, hp, qsec) %>%
arrange(car) %>%
{
print(
ggplot() +
geom_bar(stat = 'identity', position = 'dodge', data = filter(., var != "qsec"), mapping = aes(x = car, y = value, fill = var)) +
geom_line(mapping = aes(x = 1:length(car), y = value), data = filter(., var == "qsec"))
)
}
Edit:
btw, you can check the correct order of qsec to the respective x-value by calling plotly::ggplotly(), then you can read the values better and compare them to the df, because they will show up if you point on the element...

R ggplot2: Adding another geom to coord_polar

I have a plot i wish to add another layer to
Th plot is below. I want to overlay another polar plot on it to see that the numbers "match up"
In the example below I have create the plot for one species of the iris dataset. I would like to overlay another plot of a different species
Thank you for your time
library(ggplot2)
library(dplyr)
mydf <- iris
plot.data <- tidyr::gather(mydf,key = attribute ,value = avg_score, Sepal.Length:Petal.Width)
plot.data <- plot.data %>%
filter(Species == 'setosa') %>%
group_by(attribute) %>%
summarise(attr_mean = mean(avg_score))
ggplot(plot.data, aes(x=attribute, y = attr_mean, col = attribute)) +
geom_bar(stat = "identity", fill = 'white') +
coord_polar(theta = "x") +
theme_bw()
This is quite the pedestrian way of doing things.
plot.setosa <- plot.data %>%
filter(Species == 'setosa') %>%
group_by(attribute) %>%
summarise(attr_mean = mean(avg_score))
plot.virginica <- plot.data %>%
filter(Species == 'virginica') %>%
group_by(attribute) %>%
summarise(attr_mean = mean(avg_score))
ggplot(plot.setosa, aes(x=attribute, y = attr_mean, col = attribute)) +
geom_bar(stat = "identity", fill = 'blue', alpha = 0.25) +
geom_bar(data = plot.virginica, stat = "identity", fill= "green", alpha = 0.25,
aes(x = attribute, y = attr_mean, col = attribute)) +
coord_polar(theta = "x") +
theme_bw()
And a slightly less pedestrian.
xy <- plot.data %>%
group_by(Species, attribute) %>%
summarise(attr_mean = mean(avg_score))
ggplot(xy, aes(x = attribute, y = attr_mean, color = attribute, fill = Species)) +
theme_bw() +
geom_bar(stat = "identity", alpha = 0.25) +
coord_polar(theta = "x")

Resources