How to use 'facet' to create multiple density plot in GGPLOT - r

I have the following data created on the fly:
> df <- data.frame( cbind(rnorm(200),rnorm(200, mean=.8),rnorm(200, mean=.9),rnorm(200, mean=1),rnorm(200, mean=.2),rnorm(200, mean=.3)),rnorm(200, mean=4),rnorm(200, mean=.5))
> colnames(df) <- c("w.cancer","w.normal","x.cancer","x.normal","y.cancer","y.normal","z.cancer","z.normal")
> df_log<-log2(df) # ignore the warning with NA
> head(df_log)
What I want to do is to create multiple plots in one panel like the sketch below using 'facet'.
How can I go about it?

You'll have to prepare your data first. I've illustrated this on your data.frame df as it is a proper normal distribution.
require(ggplot2)
require(reshape2)
df$id <- 1:nrow(df)
df.m <- melt(df, "id")
df.m$grp1 <- factor(gsub("\\..*$", "", df.m$variable))
df.m$grp2 <- factor(gsub(".*\\.", "", df.m$variable))
p <- ggplot(data = df.m, aes(x=value)) + geom_density(aes(fill=grp2), alpha = 0.4)
p <- p + facet_wrap( ~ grp1)
p + scale_fill_brewer(palette = "Set1")
Doing the same by replacing df with df_log you'd get something like this:
require(ggplot2)
require(reshape2)
df_log$id <- 1:nrow(df_log)
df.m <- melt(df_log, "id")
df.m$grp1 <- factor(gsub("\\..*$", "", df.m$variable))
df.m$grp2 <- factor(gsub(".*\\.", "", df.m$variable))
p <- ggplot(data = df.m, aes(x=value)) + geom_density(aes(fill=grp2), alpha = 0.5)
p <- p + facet_wrap( ~ grp1)
p

Related

Creating graphs in a loop: one graph different from the others

I have an elaborate code to create a series of graphs. I would like to put a vertical line in one of the many graphs I create.
Consider the following simple code:
library(ggplot2)
library(grid)
library(gridExtra)
plots <- list()
for (i in 1:4) {
V1 <- rnorm(1000)
V2 <- seq(1000)
df <- data.frame(V1, V2)
plots[[i]] <- ggplot(df, aes(x= V2, y=V1)) +
geom_point()+
geom_vline(xintercept = 500, color="red")
}
grid.arrange(grobs=plots, nrow=2)
I would like to have the red vertical line for graph 4 but not the others. How would I do this efficiently?
You don't need a for-loop and if-statement for this matter. You can use faceting;
library(ggplot2)
library(grid)
library(gridExtra)
library(dplyr)
set.seed(123) ## set the seed for random numbers to be reproducible
df <- bind_rows(lapply(1:4, function(x)
data.frame(V1=rnorm(1000), V2=seq(1000))), .id = 'facet')
ggplot(df, aes(x= V2, y=V1)) +
geom_point() +
facet_wrap(~facet) +
geom_vline(data=data.frame(xint=500,facet=4), aes(xintercept = xint), color = "red")
just split your plot production and set a condition :)
library(ggplot2)
library(grid)
library(gridExtra)
plots <- list()
for (i in 1:4) {
V1 <- rnorm(1000)
V2 <- seq(1000)
df <- data.frame(V1, V2)
plots[[i]] <- ggplot(df, aes(x= V2, y=V1)) +
geom_point()
if (i == 4) plots[[i]] <- plots[[i]] + geom_vline(xintercept = 500, color="red")
}
grid.arrange(grobs=plots, nrow=2)

Question about ggplot, for loop and marrangeGrob. Does anyone know how to fix the code when all plots are the same? [duplicate]

I've written a for loop which goes through the columns of a dataframe and produces a graph for each column using ggplot. The problem is the graphs that are output are all the same - they're all graphs of the final column.
The code I've used is:
library(gridExtra)
library(ggplot2)
test1 <- c("Person1","Person2","Person3","Person4","Person5")
test2 <- as.data.frame(c(1,2,3,4,5))
test3 <- as.data.frame(c(2,2,2,2,2))
test4 <- as.data.frame(c(1,3,5,3,1))
test5 <- as.data.frame(c(5,4,3,2,1))
test <- cbind(test1,test2,test3,test4,test5)
rm(test1,test2,test3,test4,test5)
colnames(test) <- c("Person","var1","var2","var3","var4")
for(i in 2:5){
nam <- paste0("graph", i-1)
graph_temp <- ggplot(test, aes(Person, test[,i])) + geom_bar(stat = "identity")
assign(nam, graph_temp)
}
grid.arrange(graph1, graph2, graph3, graph4, ncol=2)
What I'm aiming for is the plot from this code:
library(gridExtra)
library(ggplot2)
test1 <- c("Person1","Person2","Person3","Person4","Person5")
test2 <- as.data.frame(c(1,2,3,4,5))
test3 <- as.data.frame(c(2,2,2,2,2))
test4 <- as.data.frame(c(1,3,5,3,1))
test5 <- as.data.frame(c(5,4,3,2,1))
test <- cbind(test1,test2,test3,test4,test5)
rm(test1,test2,test3,test4,test5)
colnames(test) <- c("Person","var1","var2","var3","var4")
graph1 <- ggplot(test, aes(Person, test[,2])) + geom_bar(stat = "identity")
graph2 <- ggplot(test, aes(Person, test[,3])) + geom_bar(stat = "identity")
graph3 <- ggplot(test, aes(Person, test[,4])) + geom_bar(stat = "identity")
graph4 <- ggplot(test, aes(Person, test[,5])) + geom_bar(stat = "identity")
grid.arrange(graph1, graph2, graph3, graph4, ncol=2)
I know there's a similar question on saving ggplots in a for loop, but I've not managed to get that one to work for this problem.
Here's a more concise way to produce your example:
df <- data.frame(
Person = paste0("Person", 1:5),
var1 = c(1,2,3,4,5),
var2 = c(2,2,2,2,2),
var3 = c(1,3,5,3,1),
var4 = c(5,4,3,2,1)
)
Now, about your plots.
Best solution
Reshape the data frame to 'long' format, and then use facets:
library(ggplot2)
library(tidyr)
gather(df, var, value, -Person) %>%
ggplot(aes(Person, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ var)
Otherwise…
If you gotta stick with a data structure that looks like what you posted, then use aes_string:
library(ggplot2)
library(gridExtra)
g <- lapply(1:4, function(i) {
ggplot(df, aes_string("Person", paste0("var", i))) +
geom_bar(stat = "identity")
})
grid.arrange(grobs = g, ncol = 2)

ggplot for loop outputs all the same graph

I've written a for loop which goes through the columns of a dataframe and produces a graph for each column using ggplot. The problem is the graphs that are output are all the same - they're all graphs of the final column.
The code I've used is:
library(gridExtra)
library(ggplot2)
test1 <- c("Person1","Person2","Person3","Person4","Person5")
test2 <- as.data.frame(c(1,2,3,4,5))
test3 <- as.data.frame(c(2,2,2,2,2))
test4 <- as.data.frame(c(1,3,5,3,1))
test5 <- as.data.frame(c(5,4,3,2,1))
test <- cbind(test1,test2,test3,test4,test5)
rm(test1,test2,test3,test4,test5)
colnames(test) <- c("Person","var1","var2","var3","var4")
for(i in 2:5){
nam <- paste0("graph", i-1)
graph_temp <- ggplot(test, aes(Person, test[,i])) + geom_bar(stat = "identity")
assign(nam, graph_temp)
}
grid.arrange(graph1, graph2, graph3, graph4, ncol=2)
What I'm aiming for is the plot from this code:
library(gridExtra)
library(ggplot2)
test1 <- c("Person1","Person2","Person3","Person4","Person5")
test2 <- as.data.frame(c(1,2,3,4,5))
test3 <- as.data.frame(c(2,2,2,2,2))
test4 <- as.data.frame(c(1,3,5,3,1))
test5 <- as.data.frame(c(5,4,3,2,1))
test <- cbind(test1,test2,test3,test4,test5)
rm(test1,test2,test3,test4,test5)
colnames(test) <- c("Person","var1","var2","var3","var4")
graph1 <- ggplot(test, aes(Person, test[,2])) + geom_bar(stat = "identity")
graph2 <- ggplot(test, aes(Person, test[,3])) + geom_bar(stat = "identity")
graph3 <- ggplot(test, aes(Person, test[,4])) + geom_bar(stat = "identity")
graph4 <- ggplot(test, aes(Person, test[,5])) + geom_bar(stat = "identity")
grid.arrange(graph1, graph2, graph3, graph4, ncol=2)
I know there's a similar question on saving ggplots in a for loop, but I've not managed to get that one to work for this problem.
Here's a more concise way to produce your example:
df <- data.frame(
Person = paste0("Person", 1:5),
var1 = c(1,2,3,4,5),
var2 = c(2,2,2,2,2),
var3 = c(1,3,5,3,1),
var4 = c(5,4,3,2,1)
)
Now, about your plots.
Best solution
Reshape the data frame to 'long' format, and then use facets:
library(ggplot2)
library(tidyr)
gather(df, var, value, -Person) %>%
ggplot(aes(Person, value)) +
geom_bar(stat = "identity") +
facet_wrap(~ var)
Otherwise…
If you gotta stick with a data structure that looks like what you posted, then use aes_string:
library(ggplot2)
library(gridExtra)
g <- lapply(1:4, function(i) {
ggplot(df, aes_string("Person", paste0("var", i))) +
geom_bar(stat = "identity")
})
grid.arrange(grobs = g, ncol = 2)

How to plot three point lines using ggplot2 instead of the default plot in R

I have three matrix and I want to plot the graph using ggplot2. I have the data below.
library(cluster)
require(ggplot2)
require(scales)
require(reshape2)
data(ruspini)
x <- as.matrix(ruspini[-1])
w <- matrix(W[4,])
df <- melt(data.frame(max_Wmk, min_Wmk, w, my_time = 1:10), id.var = 'my_time')
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
I want to add the three plots into one plot using a beautiful ggplot2.
Moreover, I want to make the points with different values have different colors.
I'm not quite sure what you're after, here's a guess
Your data...
max <- c(175523.9, 33026.97, 21823.36, 12607.78, 9577.648, 9474.148, 4553.296, 3876.221, 2646.405, 2295.504)
min <- c(175523.9, 33026.97, 13098.45, 5246.146, 3251.847, 2282.869, 1695.64, 1204.969, 852.1595, 653.7845)
w <- c(175523.947, 33026.971, 21823.364, 5246.146, 3354.839, 2767.610, 2748.689, 1593.822, 1101.469, 1850.013)
Slight modification to your base plot code to make it work...
plot(1:10,max,type='b',xlab='Number',ylab='groups',col=3)
points(1:10,min,type='b', col=2)
points(1:10,w,type='b',col=1)
Is this what you meant?
If you want to reproduce this with ggplot2, you might do something like this...
# ggplot likes a long table, rather than a wide one, so reshape the data, and add the 'time' variable explicitly (ie. my_time = 1:10)
require(reshape2)
df <- melt(data.frame(max, min, w, my_time = 1:10), id.var = 'my_time')
# now plot, with some minor customisations...
require(ggplot2); require(scales)
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
UPDATE after the question was edited and the example data changed, here's an edit to suit the new example data:
Here's your example data (there's scope for simplification and speed gains here, but that's another question):
library(cluster)
require(ggplot2)
require(scales)
require(reshape2)
data(ruspini)
x <- as.matrix(ruspini[-1])
wss <- NULL
W=matrix(data=NA,ncol=10,nrow=100)
for(j in 1:100){
k=10
for(i in 1: k){
wss[i]=kmeans(x,i)$tot.withinss
}
W[j,]=as.matrix(wss)
}
max_Wmk <- matrix(data=NA, nrow=1,ncol=10)
for(i in 1:10){
max_Wmk[,i]=max(W[,i],na.rm=TRUE)
}
min_Wmk <- matrix(data=NA, nrow=1,ncol=10)
for(i in 1:10){
min_Wmk[,i]=min(W[,i],na.rm=TRUE)
}
w <- matrix(W[4,])
Here's what you need to do to make the three objects into vectors so you can make the data frame as expected:
max_Wmk <- as.numeric(max_Wmk)
min_Wmk <- as.numeric(min_Wmk)
w <- as.numeric(w)
Now reshape and plot as before...
df <- melt(data.frame(max_Wmk, min_Wmk, w, my_time = 1:10), id.var = 'my_time')
ggplot(df, aes(colour = variable, x = my_time, y = value)) +
geom_point(size = 3) +
geom_line() +
scale_y_continuous(labels = comma) +
theme_minimal()
And here's the result:

ggplot() lines transparency

How to change the transparency level of lines in ggplot() diagram (i.e. histogram, line plot, etc.)?
For instance consider the code below:
data <- data.frame(a=rnorm(100), b = rnorm(100,.5,1.2))
data <- melt(data)
colnames(data) <- c("Category", "Intensity")
p <- ggplot(data, aes(x=Intensity))
p <- p + geom_density(aes(color=Category), size=2, alpha=.4)
print(p)
I expected the lines would be transparent (as alpha=.4), but they're not.
Simply following #baptiste's directions,
data <- data.frame(a=rnorm(100), b = rnorm(100,.5,1.2))
data <- melt(data)
colnames(data) <- c("Category", "Intensity")
p <- ggplot(data, aes(x=Intensity))
p + geom_line(aes(color=Category), stat="density", size=2, alpha=0.4)

Resources