This question already exists:
R: Convert "grob" (graphical object) to "ggplot" [duplicate]
Closed 2 years ago.
I working with the R programming language. I am trying to convert a "grob" object into a "ggplot" object (the goal is eventually to convert the ggplot object into a "plotly" object).
I am looking for "the most simple" way to convert "grob" to "ggplot" - the computer I am using does not have a USB port or an internet connection, it only has R with some preloaded libraries (e.g. ggplot2, ggpubr)
In my example: I generated some data, ran a statistical model ("random forest") and plotted the results using "compressed" axis ("Tsne"). The code below can be copy/pasted into R, and the resulting "plot" ("final_plot") is the object that I want to convert to "ggplot":
library(cluster)
library(Rtsne)
library(dplyr)
library(randomForest)
library(caret)
library(ggplot2)
library(plotly)
#PART 1 : Create Data
#generate 4 random variables : response_variable ~ var_1 , var_2, var_3
var_1 <- rnorm(10000,1,4)
var_2<-rnorm(10000,10,5)
var_3 <- sample( LETTERS[1:4], 10000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
response_variable <- sample( LETTERS[1:2], 10000, replace=TRUE, prob=c(0.4, 0.6) )
#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, response_variable)
#declare var_3 and response_variable as factors
f$response_variable = as.factor(f$response_variable)
f$var_3 = as.factor(f$var_3)
#create id
f$ID <- seq_along(f[,1])
#PART 2: random forest
#split data into train set and test set
index = createDataPartition(f$response_variable, p=0.7, list = FALSE)
train = f[index,]
test = f[-index,]
#create random forest statistical model
rf = randomForest(response_variable ~ var_1 + var_2 + var_3, data=train, ntree=20, mtry=2)
#have the model predict the test set
pred = predict(rf, test, type = "prob")
labels = as.factor(ifelse(pred[,2]>0.5, "A", "B"))
confusionMatrix(labels, test$response_variable)
#PART 3: Visualize in 2D (source: https://dpmartin42.github.io/posts/r/cluster-mixed-types)
gower_dist <- daisy(test[, -c(4,5)],
metric = "gower")
gower_mat <- as.matrix(gower_dist)
labels = data.frame(labels)
labels$ID = test$ID
tsne_obj <- Rtsne(gower_dist, is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(labels$labels),
name = labels$ID)
plot = ggplot(aes(x = X, y = Y), data = tsne_data) +
geom_point(aes(color = labels$labels))
plotly_plot = ggplotly(plot)
a = tsne_obj$Y
a = data.frame(a)
data = a
data$class = labels$labels
decisionplot <- function(model, data, class = NULL, predict_type = "class",
resolution = 100, showgrid = TRUE, ...) {
if(!is.null(class)) cl <- data[,class] else cl <- 1
data <- data[,1:2]
k <- length(unique(cl))
plot(data, col = as.integer(cl)+1L, pch = as.integer(cl)+1L, ...)
# make grid
r <- sapply(data, range, na.rm = TRUE)
xs <- seq(r[1,1], r[2,1], length.out = resolution)
ys <- seq(r[1,2], r[2,2], length.out = resolution)
g <- cbind(rep(xs, each=resolution), rep(ys, time = resolution))
colnames(g) <- colnames(r)
g <- as.data.frame(g)
### guess how to get class labels from predict
### (unfortunately not very consistent between models)
p <- predict(model, g, type = predict_type)
if(is.list(p)) p <- p$class
p <- as.factor(p)
if(showgrid) points(g, col = as.integer(p)+1L, pch = ".")
z <- matrix(as.integer(p), nrow = resolution, byrow = TRUE)
contour(xs, ys, z, add = TRUE, drawlabels = FALSE,
lwd = 2, levels = (1:(k-1))+.5)
invisible(z)
}
model <- randomForest(class ~ ., data=data, mtry=2, ntrees=500)
#this is the final plot
final_plot = decisionplot(model, data, class = "class", main = "rf (1)")
From here, I am trying to convert this object ("final_plot") into a ggplot object:
library(ggpubr)
final = ggpubr::as_ggplot(final_plot)
But this gives me the following error:
Error in gList(...) : only 'grobs' allowed in "gList"
From here, I eventually would have wanted to use this command to convert the ggplot into a plotly object:
plotly_plot = ggplotly(final)
Does anyone know if there is a straightforward way to convert "final_plot" into a ggplot object? (and then plotly)? I don't have the ggplotify library.
Thanks
I am using the R programming language. I made an earlier post (R: Using "microbenchmark" and ggplot2 to plot runtimes) where I am learning how to use loops and functions to iterate procedures (7 procedures) in R for sample sizes. Once this is done, I want to produce a plot.
Based on the previous answer, I tried to write a few of these loops in R:
library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)
#simulate data
var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )
#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)
#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)
# configure run sizes
sizes <- c(100,200,300,400,500,600,700,800,900,1000)
# Procedure 1: :
proc1 <- function(size){
assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
metric = "gower"),envir = .GlobalEnv)
assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
envir = .GlobalEnv)
}
proc1List <- lapply(sizes,function(x){
b <- microbenchmark(proc1(x))
b$obs <- x
b
})
proc1summary <- do.call(rbind,(proc1List))
#procedure2
proc2 <- function(size){
assign(paste0("lof_",size), lof(gower_dist, k=3),envir = .GlobalEnv)}
proc2List <- lapply(sizes,function(x){
b <- microbenchmark(proc2(x))
b$obs <- x
b
})
proc2summary <- do.call(rbind,(proc2List))
#procedure3
proc3 <- function(size){
assign(paste0("lof_",size), lof(gower_dist, k=5),envir = .GlobalEnv)}
proc3List <- lapply(sizes,function(x){
b <- microbenchmark(proc3(x))
b$obs <- x
b
})
proc3summary <- do.call(rbind,(proc3List))
#procedure4
proc4 <- function(size){
assign(paste0("tsne_obj_",size),Rtsne(gower_dist, is_distance = TRUE),envir = .GlobalEnv)
assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID) ,envir = .GlobalEnv)}
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x))
b$obs <- x
b
})
proc4summary <- do.call(rbind,(proc4List))
#procedure5
proc5 <- function(size){
assign(paste0("tsne_obj_",size),Rtsne(gower_dist, perplexity = 10, is_distance = TRUE),envir = .GlobalEnv)
assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID) ,envir = .GlobalEnv)}
proc5List <- lapply(sizes,function(x){
b <- microbenchmark(proc5(x))
b$obs <- x
b
})
proc5summary <- do.call(rbind,(proc5List))
#procedure6
proc6 <- function(size){
assign(paste0("plot_",size),ggplot(aes(x = X, y = Y), data = tsne_data) + geom_point(aes()),envir = .GlobalEnv)}
proc6List <- lapply(sizes,function(x){
b <- microbenchmark(proc6(x))
b$obs <- x
b
})
proc6summary <- do.call(rbind,(proc6List))
#procedure 7
proc7 <- function(size) {
assign(paste0 ("tsne_obj_", size), Rtsne(gower_dist, is_distance = TRUE), envir = .GlobalEnv)
assign(paste0 ("tsne_data_", size), tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID,
lof=lof,
var1=f$var_1,
var2=f$var_2,
var3=f$var_3
), envir = .GlobalEnv)
assign(paste0 ("p1_", size), ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1,
var2=var2, var3=var3), data = tsne_data) +
geom_point(shape=1, col="red") + theme_minimal(), envir = .GlobalEnv)
assign(paste0 ("plotly_", size),
ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3")
), envir = .GlobalEnv)
}
proc7List <- lapply(sizes,function(x){
b <- microbenchmark(proc7(x))
b$obs <- x
b
})
proc7summary <- do.call(rbind,(proc7List))
do.call(rbind,list(proc1summary,proc2summary,proc3summary, proc4summary, proc5summary, proc6summary, proc7summary)) %>%
group_by(expr,obs) %>%
summarise(.,time_ms = mean(time) * .000001) -> proc_time
ggplot(proc_time,aes(obs,time_ms,group = expr)) +
geom_line(aes(group = expr),color = "grey80") +
geom_point(aes(color = expr))
However, for some of these procedures, when I call them though a list, I keep getting an error:
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x))
b$obs <- x
b
})
Error: Problem with `mutate()` input `name`.
x Input `name` can't be recycled to size 100.
i Input `name` is `f$ID`.
i Input `name` must be size 100 or 1, not 1000.
I tried reading other stackoverflow posts (Input `typ` can't be recycled to size in R), but I could not understand why this "recycling error" keeps showing up. Is it because "size = 100" is too small? Is it because some of the variables have been named improperly?
Could someone please tell me what I am doing wrong?
Thanks
In order to make procedures 4 - 7 work we needed to make the adjustments listed in the conclusions section of Using microbenchmark and ggplot2 to plot runtimes:
Wrap the original procedure in a function that we can use as the unit of analysis for microbenchmark(), and include a size argument
Modify the procedure to use size as a variable where necessary
Modify the procedure to access objects from previous steps, based on the size argument
Modify the procedure to write its outputs with assign() and size if these are needed for subsequent procedure steps
The modified code looks like this:
library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)
#simulate data
var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )
#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)
#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)
# configure run sizes
sizes <- c(10,50,100,200,500,1000)
# configure # of benchmark runs
time_ct <- 10
# Procedure 1: :
proc1 <- function(size){
assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
metric = "gower"),envir = .GlobalEnv)
assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
envir = .GlobalEnv)
}
proc1List <- lapply(sizes,function(x){
b <- microbenchmark(proc1(x),times=time_ct)
b$obs <- x
b
})
proc1summary <- do.call(rbind,(proc1List))
#Procedure 2
proc2 <- function(size){
lof <- lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=3)
}
proc2List <- lapply(sizes,function(x){
b <- microbenchmark(proc2(x),times=time_ct)
b$obs <- x
b
})
proc2summary <- do.call(rbind,(proc2List))
#Procedure 3
proc3 <- function(size){
assign(paste0("lof_",size),lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=5),
envir = .GlobalEnv)
}
proc3List <- lapply(sizes,function(x){
b <- microbenchmark(proc3(x),times=time_ct)
b$obs <- x
b
})
proc3summary <- do.call(rbind,(proc3List))
proc4 <- function(size){
tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),
perplexity = min(30,(size-1)/3),
is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = 1:size)
}
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x),times=time_ct)
b$obs <- x
b
})
proc4summary <- do.call(rbind,(proc4List))
proc5 <- function(size){
tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),
perplexity = min(10,(size-1)/3),
is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = 1:size)
assign(paste0("tsne_data_",size),tsne_data,envir = .GlobalEnv)
}
proc5List <- lapply(sizes,function(x){
b <- microbenchmark(proc5(x),times=time_ct)
b$obs <- x
b
})
proc5summary <- do.call(rbind,(proc5List))
proc6 <- function(size){
plot = ggplot(aes(x = X, y = Y), data = get(paste0("tsne_data_",size),envir = .GlobalEnv)) + geom_point(aes())
}
proc6List <- lapply(sizes,function(x){
b <- microbenchmark(proc6(x),times=time_ct)
b$obs <- x
b
})
proc6summary <- do.call(rbind,(proc6List))
proc7 <- function(size){
tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),
perplexity = min(30,(size-1)/3),
is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = 1:size,
lof=get(paste0("lof_",size),envir = .GlobalEnv),
var1=f$var_1[1:size],
var2=f$var_2[1:size],
var3=f$var_3[1:size]
)
p1 <- ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1,
var2=var2, var3=var3), data = tsne_data) +
geom_point(shape=1, col="red")+
theme_minimal()
ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3"))
}
proc7List <- lapply(sizes,function(x){
b <- microbenchmark(proc7(x),times=time_ct)
b$obs <- x
b
})
proc7summary <- do.call(rbind,(proc7List))
do.call(rbind,list(proc1summary,proc2summary,proc3summary,proc4summary,proc5summary,
proc6summary,proc7summary)) %>%
group_by(expr,obs) %>%
summarise(.,time_ms = mean(time) * .000001) -> proc_time
head(proc_time)
ggplot(proc_time,aes(obs,time_ms,group = expr)) +
geom_line(aes(group = expr),color = "grey80") +
geom_point(aes(color = expr))
...and the output:
Notes
Since some of these procedures take a long time to run (relatively speaking), we reduced the number of iterations in microbenchmark() from the default of 100 to 10, which is more than sufficient to demonstrate that the code works as intended. Since this was set as a parameter at the top of the code, one can increase this number to increase the number of times each benchmark is executed.
In the past i've used to using PCA and loading plots to visualise data using stats::prcomp and ggbiplot. Like this:
I've recently been introduced to t-SNE analysis (late to the game here) that has been revolutionary in reduction analysis and exploring patterns in data.
While t-SNE is picking up clusters in my data that PCA can't clearly distinguish, the downside is that there's no loading plot to understand which characteristic influences each cluster in space.
Is there any way with t-SNE I can form some sort of loading plot? Or are there any complementary methods associated with t-SNE that can be used? I'd really like to keep the t-SNE plot over the PCA plot for visualizing a particular dataset, but without the loading plot it's not as informative.
Any ideas? Dummy data below:
library(FD)
library(ggbiplot)
# generate random data
a <- sample(x = 1:5, size = 100, replace = TRUE)
b <- sample(x = 1:5, size = 100, replace = TRUE)
c <- sample(x = 1:5, size = 100, replace = TRUE)
d <- sample(x = 1:5, size = 100, replace = TRUE)
e <- sample(x = 1:5, size = 100, replace = TRUE)
f <- sample(x = 1:5, size = 100, replace = TRUE)
g <- sample(x = 1:5, size = 100, replace = TRUE)
# make ordinal
a <- ordered(a, levels = 1:5)
b <- ordered(b, levels = 1:5)
c <- ordered(c, levels = 1:5)
d <- ordered(d, levels = 1:5)
e <- ordered(e, levels = 1:5)
f <- ordered(f, levels = 1:5)
g <- ordered(g, levels = 1:5)
#as dataframe
table <- as.data.frame(cbind(a,b,c,d,e,f,g))
#PCA
pca <- prcomp(table,center = TRUE,scale. = TRUE)
ggbiplot(pca)
#t-SNE
gower_dist <- daisy(table, metric = "gower")
tsne_obj <- Rtsne(gower_dist, is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y"))
ggplot(aes(x = X, y = Y), data = tsne_data) +
geom_point()
I need to plot a time series in Dygraphs, where multiple time series are plotted together.
As data we can use
df <- cbind(mdeaths, fdeaths)
as is done on dygraphs website: https://rstudio.github.io/dygraphs/
However I would like to make the prediction of both time series continue in the same image as the original data. I have made a crude drawing of what I want to achieve
One way is naturally to make the predictions with i.e auto.arima separately and then combine the data once again. I am wondering if there is a functionality, that can do it all in one shot?
I think what you mean is answered in "Upper/Lower Bars" section at the same website: https://rstudio.github.io/dygraphs/gallery-upper-lower-bars.html.
You need to make your prediction model, for example:
hw <- HoltWinters(mdeaths)
p <- predict(hw, n.ahead = 36, prediction.interval = TRUE)
all <- cbind(ldeaths, p)
and plot de graph:
dygraph(all, "Deaths from Lung Disease (UK)") %>%
dySeries("mdeaths", label = "Actual") %>%
dySeries(c("p.lwr", "p.fit", "p.upr"), label = "Predicted")
An easy solution is:
hw <- HoltWinters(mdeaths)
predictedMen <- predict(hw, n.ahead = 12 ,prediction.interval = TRUE)
hw <- HoltWinters(fdeaths)
predictedWomen <- predict(hw, n.ahead = 12 ,prediction.interval = TRUE)
predictedMen %>% class
AuxF <- function(x){
x <- data.frame(date= as.Date(x), Vuelos=as.matrix(x) )
x <- read.zoo(x, format = "%Y-%m-%d")
return(x)
}
DF <- zoo(
cbind(
rbind( cbind( upr = AuxF(mdeaths), fit= AuxF(mdeaths) , lwr = AuxF(mdeaths))
, cbind( upr = AuxF(predictedMen[ , 2]), fit= AuxF(predictedMen[ , 1]) , lwr =
AuxF(predictedMen[ , 3])) )
, rbind( cbind( upr = AuxF(fdeaths), fit = AuxF(fdeaths) , lwr = AuxF(fdeaths))
, cbind( upr = AuxF(predictedWomen[ , 2]), fit = AuxF(predictedWomen[ , 1]) ,
wr = AuxF(predictedWomen[ , 3])) )
) )
names(DF) <- c("predictedMen.upr", "predictedMen.fit", "predictedMen.lwr",
"predictedWomen.upr", "predictedWomen.fit", "predictedWomen.lwr")
dygraph(DF, main = "Predicted Lung Deaths (UK)") %>%
dyAxis("x", drawGrid = FALSE) %>%
dySeries(c("predictedMen.upr", "predictedMen.fit", "predictedMen.lwr"), label =
"Men")
%>% dySeries(c("predictedWomen.upr", "predictedWomen.fit", "predictedWomen.lwr"),
label = "Women") %>%
dyOptions(colors = RColorBrewer::brewer.pal(3, "Set1")) %>%
dyRangeSelector(height = 200)