Plot MNIST digits with ggplot2 - r

I want to plot the MNIST digits using ggplot2.
I tried this but I'm getting the numbers rotated 90 degrees. The code below is to plot the 2nd number in the dataset which corresponds to a 2.
trainData = read.csv(file = url("https://drive.google.com/uc?export=download&id=0B4Tqe9kUUfrBSllGY29pWmdGQUE"))
df = expand.grid(y = 0:27, x = 0:27)
df$col = unlist(trainData[2, -c(1,2)])
ggplot(df, aes(x, y)) + geom_tile(aes(fill = col))
If possible, please consider in your solution that I plan expand this to plotting a matrix of numbers using facet_grid or facet_wrap. I want to end with a function that I will pass a vector of rows and the function will get those rows from the dataset and create a matrix of plots (one for each number).
Thanks!

mnist is a build-in dataset in keras package.
Here is one example plot with ggplot2 and tidyverse functions:
To make geom_tile work, we need to transform the data a bit.
library(keras)
library(dplyr)
library(tibble)
library(tidyr)
library(stringr)
mnist <- keras::dataset_mnist()
mnist$test$x[sample(1:100,1), 1:28, 1:28] %>%
as_data_frame() %>%
rownames_to_column(var = 'y') %>%
gather(x, val, V1:V28) %>%
mutate(x = str_replace(x, 'V', '')) %>%
mutate(x = as.numeric(x),
y = as.numeric(y)) %>%
mutate(y = 28-y) %>%
ggplot(aes(x, y))+
geom_tile(aes(fill = val+1))+
coord_fixed()+
theme_void()+
theme(legend.position="none")

Related

ggplot2 version of shepard plot, i.e. vegan::stressplot()?

There seems to be quite a bit of information for plotting NMDS outputs (i.e. NMDS1 vs NMDS1) using ggplot2 however I cannot find a way to plot the vegan::stressplot() (shepard's plot) using ggplot2.
Is there a way to produce a ggplot2 version of a metaMDS output?
Reproducible code
library(vegan)
set.seed(2)
community_matrix = matrix(
sample(1:100,300,replace=T),nrow=10,
dimnames=list(paste("community",1:10,sep=""),paste("sp",1:30,sep="")))
example_NMDS=metaMDS(community_matrix, k=2)
stressplot(example_NMDS)
Created on 2021-09-17 by the reprex package (v2.0.1)
Here's a workaround to plot a very similar plot using ggplot2.The trick was to get the structure of the stressplot(example_NMDS) and extract the data stored in that object. I used the tidyverse package that includes ggplot and other packages such as tidyr that contains the pivot_longer function.
library(vegan)
library(tidyverse)
# Analyze the structure of the stressplot
# Notice there's an x, y and yf list
str(stressplot(example_NMDS))
# Create a tibble that contains the data from stressplot
df <- tibble(x = stressplot(example_NMDS)$x,
y = stressplot(example_NMDS)$y,
yf = stressplot(example_NMDS)$yf) %>%
# Change data to long format
pivot_longer(cols = c(y, yf),
names_to = "var")
# Create plot
df %>%
ggplot(aes(x = x,
y = value)) +
# Add points just for y values
geom_point(data = df %>%
filter(var == "y")) +
# Add line just for yf values
geom_step(data = df %>%
filter(var == "yf"),
col = "red",
direction = "vh") +
# Change axis labels
labs(x = "Observed Dissimilarity", y = "Ordination Distance") +
# Add bw theme
theme_bw()

Plot percentages in R as blocks

I have the table to the left
table <- cbind(c("x1","x2", "x3"), c("0.4173","0.9211","0.0109"))
and is trying to make the plot two the right.
Is there any packages in R, which can do, what I'm trying to achieve?
A base R, option would be to use barplot applied on a named vector
barplot(v1)
Or convert to two column data.frame with stack and use the formula method
barplot(values ~ ind, stack(v1))
Or we can can use tidyverse with ggplot
library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
enframe(v1, name = "id", value = 'block') %>%
mutate(non_block = 1 - block) %>%
pivot_longer(cols = -id) %>%
ggplot(aes(x = id, y = value, fill = name)) +
geom_col() +
coord_flip() +
theme_bw()
-output
data
v1 <- setNames(c(0.4173, 0.9211, 0.0109), paste0("x", 1:3))

two-panel scatter plot in ggplot2

For my data.frame full below, I'm wondering how to create a two-panel geom_point such that on the first panel, we have ols.(Intercept) (x-axis) plotted against hlm.(Intercept), AND on the second panel, we have ols.ses (x-axis) plotted against hlm.ses?
library(lme4)
library(tidyverse)
hsb <- read.csv('https://raw.githubusercontent.com/rnorouzian/e/master/hsb.csv')
fit <- lmer(math~ses+(ses|sch.id), data= hsb)
ch <- unique(hsb$sch.id)
ols <- map_dfr(ch,~coef(lm(math~ses, data=hsb,subset=sch.id==.)))
mlm <- coef(fit)$sch
full <- cbind(ols=ols, hlm=mlm, sch.id=ch)
head(full, n = 1)
ols.(Intercept) ols.ses hlm.(Intercept) hlm.ses sch.id
1224 10.80513 2.508582 11.06002 2.504083 1224
One approach to achieve this is by making two separate plots and glue them together using e.g. patchwork:
library(lme4)
library(tidyverse)
library(patchwork)
hsb <- read.csv('https://raw.githubusercontent.com/rnorouzian/e/master/hsb.csv')
fit <- lmer(math~ses+(ses|sch.id), data= hsb)
ch <- unique(hsb$sch.id)
ols <- map_dfr(ch,~coef(lm(math~ses, data=hsb,subset=sch.id==.)))
mlm <- coef(fit)$sch
full <- cbind(ols=ols, mlm=mlm, sch.id=ch)
p1 <- ggplot(full, aes(`ols.(Intercept)`, `mlm.(Intercept)`)) +
geom_point()
p2 <- ggplot(full, aes(ols.ses, mlm.ses)) +
geom_point()
p1 + p2
And as a second approach with some data wrangling one can achieve a similar plot using facet_wrap:
library(lme4)
#> Loading required package: Matrix
library(tidyverse)
hsb <- read.csv('https://raw.githubusercontent.com/rnorouzian/e/master/hsb.csv')
fit <- lmer(math~ses+(ses|sch.id), data= hsb)
ch <- unique(hsb$sch.id)
ols <- map_dfr(ch,~coef(lm(math~ses, data=hsb,subset=sch.id==.)))
mlm <- coef(fit)$sch
full <- cbind(ols=ols, mlm=mlm, sch.id=ch)
full %>%
pivot_longer(- sch.id, names_to = "var", values_to = "value") %>%
separate(var, into = c("var1", "category"), sep = "\\.") %>%
pivot_wider(names_from = var1, values_from = value) %>%
ggplot(aes(ols, mlm)) +
geom_point() +
facet_wrap(~ category)
An option with facets. The solution from #stefan was really nice and quick. You could set an entire data pipeline by smartly separating your strings and then after reshaping you can have the desired variables in a format to be plotted using facet_wrap(). Here the code:
library(tidyverse)
#Plot
full %>% select(-sch.id) %>% pivot_longer(everything()) %>%
separate(name,c('V1','V2'),sep='\\.') %>%
arrange(V2,V1) %>%
group_by(V2,V1) %>% mutate(id=row_number()) %>%
pivot_wider(names_from = V1,values_from=value) %>% ungroup() %>%
select(-id) %>%
ggplot(aes(x=ols,y=mlm))+
geom_point()+
facet_wrap(.~V2,nrow = 1,scales = 'free')
Output:
Similar to the answer using patchwork, you can plot them as two separate ggplot() graphs and then put them side-by-side with the plot_grid() function from the cowplot package.
https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html

R - ggplot2 geom_bar() doesn't plot correctly column's values

I am new to R
I would like plot using ggplot2's geom_bar():
top_r_cuisine <- r_cuisine %>%
group_by(Rcuisine) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
top_n(10)
But when I try to plot this result by:
ggplot(top_r_cuisine, aes(x = Rcuisine)) +
geom_bar()
I get this:
which doesn't represent the values in top_r_cuisine. Why?
EDIT:
I have tried:
c_count=c(23,45,67,43,54)
country=c("america","india","germany","france","italy")
# sample Data frame #
finaldata = data.frame(country,c_count)
ggplot(finaldata, aes(x=country)) +
geom_bar(aes(weight = c_count))
you need to assign the weights in the geom_bar()

Using gganimate and ggplot for a boxplot: Cumulative not working

I'm trying to produce an animation for a simulation model, and I want to show how the distribution of results changes as the simulation runs.
I've seen gganimate used for scatter plots but not for boxplots (or ideally violin plots). Here I've provided a reprex.
When I use sim_category (which is a bucket for a certain number of simulation runs) I want the result to be cumulative of all previous runs to show the total distribution.
In this example (and my actual code), cumulative = TRUE does not do this. Why is this?
library(gganimate)
library(animation)
library(ggplot2)
df = as.data.frame(structure(list(ID = c(1,1,2,2,1,1,2,2,1,1,2,2),
value = c(10,15,5,10,7,17,4,12,9,20,6,17),
sim_category = c(1,1,1,1,2,2,2,2,3,3,3,3))))
df$ID <- factor(df$ID, levels = (unique(df$ID)))
df$sim_category <- factor(df$sim_category, levels = (unique(df$sim_category)))
ani.options(convert = shQuote('C:/Program Files/ImageMagick-7.0.5-Q16/magick.exe'))
p <- ggplot(df, aes(ID, value, frame= sim_category, cumulative = TRUE)) + geom_boxplot(position = "identity")
gganimate(p)
gganimate's cumulative doesn't accumulate the data, it just keeps gif frames in subsequent frames as they appear. To achieve what you want, you have to do the accumulation before building the plot, something along the following lines:
library(tidyverse)
library(gganimate)
df <- data_frame(
ID = factor(c(1,1,2,2,1,1,2,2,1,1,2,2), levels = 1:2),
value = c(10,15,5,10,7,17,4,12,9,20,6,17),
sim_category = factor(c(1,1,1,1,2,2,2,2,3,3,3,3), levels = 1:3)
)
p <- df %>%
pull(sim_category) %>%
levels() %>%
as.integer() %>%
map_df(~ df %>% filter(sim_category %in% 1:.x) %>% mutate(sim_category = .x)) %>%
ggplot(aes(ID, value, frame = factor(sim_category))) +
geom_boxplot(position = "identity")
gganimate(p)

Resources