two-panel scatter plot in ggplot2

two-panel scatter plot in ggplot2 - r

For my data.frame full below, I'm wondering how to create a two-panel geom_point such that on the first panel, we have ols.(Intercept) (x-axis) plotted against hlm.(Intercept), AND on the second panel, we have ols.ses (x-axis) plotted against hlm.ses?
library(lme4)
library(tidyverse)
hsb <- read.csv('https://raw.githubusercontent.com/rnorouzian/e/master/hsb.csv')
fit <- lmer(math~ses+(ses|sch.id), data= hsb)
ch <- unique(hsb$sch.id)
ols <- map_dfr(ch,~coef(lm(math~ses, data=hsb,subset=sch.id==.)))
mlm <- coef(fit)$sch
full <- cbind(ols=ols, hlm=mlm, sch.id=ch)
head(full, n = 1)
ols.(Intercept) ols.ses hlm.(Intercept) hlm.ses sch.id
1224 10.80513 2.508582 11.06002 2.504083 1224

One approach to achieve this is by making two separate plots and glue them together using e.g. patchwork:
library(lme4)
library(tidyverse)
library(patchwork)
hsb <- read.csv('https://raw.githubusercontent.com/rnorouzian/e/master/hsb.csv')
fit <- lmer(math~ses+(ses|sch.id), data= hsb)
ch <- unique(hsb$sch.id)
ols <- map_dfr(ch,~coef(lm(math~ses, data=hsb,subset=sch.id==.)))
mlm <- coef(fit)$sch
full <- cbind(ols=ols, mlm=mlm, sch.id=ch)
p1 <- ggplot(full, aes(`ols.(Intercept)`, `mlm.(Intercept)`)) +
geom_point()
p2 <- ggplot(full, aes(ols.ses, mlm.ses)) +
geom_point()
p1 + p2
And as a second approach with some data wrangling one can achieve a similar plot using facet_wrap:
library(lme4)
#> Loading required package: Matrix
library(tidyverse)
hsb <- read.csv('https://raw.githubusercontent.com/rnorouzian/e/master/hsb.csv')
fit <- lmer(math~ses+(ses|sch.id), data= hsb)
ch <- unique(hsb$sch.id)
ols <- map_dfr(ch,~coef(lm(math~ses, data=hsb,subset=sch.id==.)))
mlm <- coef(fit)$sch
full <- cbind(ols=ols, mlm=mlm, sch.id=ch)
full %>%
pivot_longer(- sch.id, names_to = "var", values_to = "value") %>%
separate(var, into = c("var1", "category"), sep = "\\.") %>%
pivot_wider(names_from = var1, values_from = value) %>%
ggplot(aes(ols, mlm)) +
geom_point() +
facet_wrap(~ category)

An option with facets. The solution from #stefan was really nice and quick. You could set an entire data pipeline by smartly separating your strings and then after reshaping you can have the desired variables in a format to be plotted using facet_wrap(). Here the code:
library(tidyverse)
#Plot
full %>% select(-sch.id) %>% pivot_longer(everything()) %>%
separate(name,c('V1','V2'),sep='\\.') %>%
arrange(V2,V1) %>%
group_by(V2,V1) %>% mutate(id=row_number()) %>%
pivot_wider(names_from = V1,values_from=value) %>% ungroup() %>%
select(-id) %>%
ggplot(aes(x=ols,y=mlm))+
geom_point()+
facet_wrap(.~V2,nrow = 1,scales = 'free')
Output:

Similar to the answer using patchwork, you can plot them as two separate ggplot() graphs and then put them side-by-side with the plot_grid() function from the cowplot package.
https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html

Related

Combine multiple gt tables to single one plot

With code below (edited basing on code from here) I generates two example tables with gt package:
library(tidyverse)
library(patchwork)
library(gt)
p1 <- mtcars %>%
head(5) %>%
gt()
p2 <- mtcars %>%
tail(5) %>%
gt()
# using wrap elements because this seems to be the answer to non-ggplot grobs e.g. #164
wrap_elements(full = p1 | p2)
grid.arrange(p1, p2, ncol=2, top="Main Title")
Out:
Error in p1 | p2 :
operations are possible only for numeric, logical or complex types
I hope to combine them into one as for ggplot objects: p <- (p1 | p2) using patchwork package, but I didn't find an effective answer yet.
I also try to convert it to ggplot using as_ggplot() function:
library(bstfun)
mtcars %>%
head(5) %>%
gt() %>%
as_ggplot()
But it raises an error:
Error: '.assert_package' is not an exported object from 'namespace:broom.helpers'
Is it possible to do so? Thanks for your help at advance.
Reference:
R - combine two gt objects in a single page output

I can offer to you this solution:
1. We take your data:
p1 <- mtcars %>%
head(5) %>%
gt()
p2 <- mtcars %>%
tail(5) %>%
gt()
2. Let's save your tables into .png's:
p1 %>%
gtsave("p11.png", path = "Your_working_dir")
p2 %>%
gtsave("p12.png", path = "Your_working_dir")
3. Let's combine your tables:
library(cowplot)
p111 <- ggdraw() + draw_image("p11.png", scale = 0.8)
p112 <- ggdraw() + draw_image("p12.png", scale = 0.8)
plot_grid(p111, p112)
Our result:

Using geom_smooth for fitting a glm to fractions

This post is somewhat related to this post.
Here I have xy grouped data where y are fractions:
library(dplyr)
library(ggplot2)
library(ggpmisc)
set.seed(1)
df1 <- data.frame(value = c(0.8,0.5,0.4,0.2,0.5,0.6,0.5,0.48,0.52),
age = rep(c("d2","d4","d45"),3),
group = c("A","A","A","B","B","B","C","C","C")) %>%
dplyr::mutate(time = as.integer(age)) %>%
dplyr::arrange(group,time) %>%
dplyr::mutate(group_age=paste0(group,"_",age))
df1$group_age <- factor(df1$group_age,levels=unique(df1$group_age))
What I'm trying to achieve is to plot df1 as a bar plot, like this:
ggplot(df1,aes(x=group_age,y=value,fill=age)) +
geom_bar(stat='identity')
But I want to fit to each group a binomial glm with a logit link function, which estimates how these fractions are affected by time.
Let's say I have 100 observations per each age (time) in each group:
df2 <- do.call(rbind,lapply(1:nrow(df1),function(i){
data.frame(age=df1$age[i],group=df1$group[i],time=df1$time[i],group_age=df1$group_age[i],value=c(rep(T,100*df1$value[i]),rep(F,100*(1-df1$value[i]))))
}))
Then the glm for each group (e.g., group A) is:
glm(value ~ time, dplyr::filter(df2, group == "A"), family = binomial(link='logit'))
So I would like to add to the plot above the estimated regression slopes for each group along with their corresponding p-values (similar to what I'm doing for the continuous df$value in this post).
I thought that using:
ggplot(df1,aes(x=group_age,y=value,fill=age)) +
geom_bar(stat='identity') +
geom_smooth(data=df2,mapping=aes(x=group_age,y=value,group=group),color="black",method='glm',method.args=list(family=binomial(link='logit')),size=1,se=T) +
stat_poly_eq(aes(label=stat(p.value.label)),formula=my_formula,parse=T,npcx="center",npcy="bottom") +
scale_x_log10(name="Age",labels=levels(df$age),breaks=1:length(levels(df$age))) +
facet_wrap(~group) + theme_minimal()
Would work but I get the error:
Error in Math.factor(x, base) : ‘log’ not meaningful for factors
Any idea how to get it right?

I believe this could help:
library(tidyverse)
library(broom)
df2$value <- as.numeric(df2$value)
#Estimate coefs
dfmodel <- df2 %>% group_by(group) %>%
do(fitmodel = glm(value ~ time, data = .,family = binomial(link='logit')))
#Extract coeffs
dfCoef = tidy(dfmodel, fitmodel)
#Create labels
dfCoef %>% filter(term=='(Intercept)') %>% mutate(Label=paste0(round(estimate,3),'(p=',round(p.value,3),')'),
group_age=paste0(group,'_','d4')) %>%
select(c(group,Label,group_age)) -> Labels
#Values
df2 %>% group_by(group,group_age) %>% summarise(value=sum(value)) %>% ungroup() %>%
group_by(group) %>% filter(value==max(value)) %>% select(-group_age) -> values
#Combine
Labels %>% left_join(values) -> Labels
Labels %>% mutate(age=NA) -> Labels
#Plot
ggplot(df2,aes(x=group_age,y=value,fill=age)) +
geom_text(data=Labels,aes(x=group_age,y=value,label=Label),fontface='bold')+
geom_bar(stat='identity')+
facet_wrap(.~group,scales='free')

Thanks to Pedro Aphalo this is nearly a complete solution:
Generate the data.frame with the fractions (here use time as an integer by deleting "d" in age rather than using time as the levels of age):
library(dplyr)
library(ggplot2)
library(ggpmisc)
set.seed(1)
df1 <- data.frame(value = c(0.8,0.5,0.4,0.2,0.5,0.6,0.5,0.48,0.52),
age = rep(c("d2","d4","d45"),3),
group = c("A","A","A","B","B","B","C","C","C")) %>%
dplyr::mutate(time = as.integer(gsub("d","",age))) %>%
dplyr::arrange(group,time) %>%
dplyr::mutate(group_age=paste0(group,"_",age))
df1$group_age <- factor(df1$group_age,levels=unique(df1$group_age))
Inflate df1 to 100 observations per each age in each group but specify value as an integer rather than a binary:
df2 <- do.call(rbind,lapply(1:nrow(df1),function(i){
data.frame(age=df1$age[i],group=df1$group[i],time=df1$time[i],group_age=df1$group_age[i],value=c(rep(1,100*df1$value[i]),rep(0,100*(1-df1$value[i]))))
}))
And now plot it using geom_smooth and stat_fit_tidy:
ggplot(df1,aes(x=time,y=value,group=group,fill=age)) +
geom_bar(stat='identity') +
geom_smooth(data=df2,mapping=aes(x=time,y=value,group=group),color="black",method='glm',method.args=list(family=binomial(link='logit'))) +
stat_fit_tidy(data=df2,mapping=aes(x=time,y=value,group=group,label=sprintf("P = %.3g",stat(x_p.value))),method='glm',method.args=list(formula=y~x,family=binomial(link='logit')),parse=T,label.x="center",label.y="top") +
scale_x_log10(name="Age",labels=levels(df2$age),breaks=unique(df2$time)) +
facet_wrap(~group) + theme_minimal()
Which gives (note that the scale_x_log10 is mainly a cosmetic approach to presenting the x-axis as time rather than levels of age):
The only imperfection is that the p-values seem to appear messed up.

Simulation with Tidyverse -- putting data into tibble format

I am trying to run simulations in R using the tidyverse. This code works, but doesn't scale well to more than a few variables.
Any thoughts on how to improve this? I've tried purrr but I didn't find any success.
The example below draws 5 values from a normal distribution and repeats this 3 times. How could I repeat it n times instead of 3?
n = 5
x=1:n
y1 = rnorm(n)
y2 = rnorm(n)
y3 = rnorm(n)
# put data into tibble
df <- tibble(x=x, y1=y1, y2=y2, y3=y3)
# Tidy data -- go from wide to long
df <- pivot_longer(df, cols=starts_with('y'))
# Make plot
ggplot(df, aes(x=x, y=value, group=name, color=name))+
geom_line()

If we need to replicate, then
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
n <- 5
rpl <- 3
replicate(rpl, rnorm(n), simplify = FALSE) %>%
set_names(str_c('y', seq_along(.))) %>%
as_tibble %>%
mutate(x = row_number()) %>%
pivot_longer(cols = starts_with('y')) %>%
ggplot(aes(x=x, y=value, group=name, color=name))+
geom_line()

Maintain ggplot panel size while axis labels change length

How can I make the panels of separate ggplots align when the y-axis labels change in length across plots? Below I've saved two subsets of mtcars with longer and shorter model names. Although the overall plots are the same size, the panels are smaller in the mt_long plot because the y-axis labels take up more of the plot.
library(dplyr)
library(ggplot2)
ds_mt <- mtcars %>% rownames_to_column("model")
mt_short <- ds_mt %>% arrange(nchar(model)) %>% slice(1:4)
mt_long <- ds_mt %>% arrange(-nchar(model)) %>% slice(1:4)
plot_short <-
mt_short %>%
ggplot(aes(x = model, y = mpg)) +
geom_col() +
coord_flip()
plot_long <-
mt_long %>%
ggplot(aes(x = model, y = mpg)) +
geom_col() +
coord_flip()
plot_short
plot_long
For this reprex, it is important that the plots be separate. Is there any way to set just the panel dimensions of the plot rather than the overall size of the plot?

We can use gridarrange from the egg package
library(egg)
ggarrange(plot_short, plot_long, ncol = 1)
To save, use
gg <- ggarrange(plot_short, plot_long, ncol = 1)
ggsave("file.png", gg)

try egg::set_panel_size(plot_short)

Plot MNIST digits with ggplot2

I want to plot the MNIST digits using ggplot2.
I tried this but I'm getting the numbers rotated 90 degrees. The code below is to plot the 2nd number in the dataset which corresponds to a 2.
trainData = read.csv(file = url("https://drive.google.com/uc?export=download&id=0B4Tqe9kUUfrBSllGY29pWmdGQUE"))
df = expand.grid(y = 0:27, x = 0:27)
df$col = unlist(trainData[2, -c(1,2)])
ggplot(df, aes(x, y)) + geom_tile(aes(fill = col))
If possible, please consider in your solution that I plan expand this to plotting a matrix of numbers using facet_grid or facet_wrap. I want to end with a function that I will pass a vector of rows and the function will get those rows from the dataset and create a matrix of plots (one for each number).
Thanks!

mnist is a build-in dataset in keras package.
Here is one example plot with ggplot2 and tidyverse functions:
To make geom_tile work, we need to transform the data a bit.
library(keras)
library(dplyr)
library(tibble)
library(tidyr)
library(stringr)
mnist <- keras::dataset_mnist()
mnist$test$x[sample(1:100,1), 1:28, 1:28] %>%
as_data_frame() %>%
rownames_to_column(var = 'y') %>%
gather(x, val, V1:V28) %>%
mutate(x = str_replace(x, 'V', '')) %>%
mutate(x = as.numeric(x),
y = as.numeric(y)) %>%
mutate(y = 28-y) %>%
ggplot(aes(x, y))+
geom_tile(aes(fill = val+1))+
coord_fixed()+
theme_void()+
theme(legend.position="none")