add labels to facet_wrap boxplot - r

I'm trying to modify the facet labels in the plot created in this post.
To repeat the code:
library(ggplot2)
mymatrix1 <- matrix(data = 0, nrow = 105, ncol =2)
mymatrix2 <- matrix(data = 0, nrow = 108, ncol =2)
mymatrix3 <- matrix(data = 0, nrow = 112, ncol =2)
mymatrix1[,1] <- sample(0:1, 105, replace= TRUE)
mymatrix1[,2] <- rnorm(105, 52, 2)
mymatrix2[,1] <- sample(0:1, 108, replace= TRUE)
mymatrix2[,2] <- rnorm(108, 60, 2)
mymatrix3[,1] <- sample(0:1, 112, replace= TRUE)
mymatrix3[,2] <- rnorm(112, 70, 2)
mydata <- list(mymatrix1, mymatrix2,mymatrix3)
for(i in 1:3){
mydata[[i]] <- cbind(mydata[[i]], i)
colnames(mydata[[i]]) <- c("class", "readcount", "group")
}
mydata <- as.data.frame(do.call(rbind, mydata))
## fixed scales
p <- qplot(class, readcount, data = mydata, geom="boxplot", fill = factor(class)) +
geom_jitter(position=position_jitter(w=0.1,h=0.1)) +
scale_x_continuous(breaks=c(0,1), labels=c("0", "1")) +
facet_wrap(~ group)
## free scales
p + facet_wrap(~ group, scales = "free")
My goal, in this plot, is to change the facet labels 1, 2 and 3 to something like "a" "b" or "c."
I thought that simply changing the data frame column "group" to letters instead of numbers in the for loop:
for(i in 1:3){
mydata[[i]] <- cbind(mydata[[i]], letters[i])
colnames(mydata[[i]]) <- c("class", "readcount", "group")
}
would work, but I get this error:
Error: Discrete value supplied to continuous scale.
Any suggestions? I looked at the other posts for changing facet labels, but they didn't seem to work for this type of plot (at least for me).
Thanks so much.

Are you looking for something like this?
mydata$group <- as.factor(mydata$group)
levels(mydata$group) <- c("A","B","C")

Related

Binning two columns of data frame together in R

I would like to bin two columns of a dataset simultaneously to create one common binned column. The simple code is as follows
x <- sample(100)
y <- sample(100)
data <- data.frame(x, y)
xbin <- seq(from = 0, to = 100, by = 10)
ybin <- seq(from = 0, to = 100, by = 10)
Any help is appreciated!
Not sure if this is what you are looking for
library(tidyverse)
x <- sample(100)
y <- sample(100)
data <- data.frame(x, y)
xbin <- seq(from = 0, to = 100, by = 10)
ybin <- seq(from = 0, to = 100, by = 10)
data <- data%>%
dplyr::mutate(
x_binned = cut(x, breaks = seq(0,100,10)),
y_binned = cut(y, breaks = seq(0,100,10))
)
data %>%
ggplot() +
geom_bin_2d(
aes(x = x_binned, y = y_binned), binwidth = c(10,10), colour = "red") +
theme_minimal()
After asking in the comments I am still not quite shure, what the desired answer would look like but I hope, that one of the two answers in the below code will work for you:
x <- sample(100)
y <- sample(100)
data <- data.frame(x, y)
xbin <- seq(from = 0, to = 100, by = 10)
ybin <- seq(from = 0, to = 100, by = 10)
data$xbin <- cut(data$x, breaks = xbin, ordered = TRUE)
data$ybin <- cut(data$y, breaks = ybin, ordered = TRUE)
data$commonbin1 <- paste0(data$xbin, data$ybin)
data$commonbin2 <- paste0("(",as.numeric(data$xbin),";", as.numeric(data$ybin),")")
head(data, 20)
This will construct a common binning variable commonbin1 that includes the bin-limits in the names of the bins and commonbin2 which will be easier to compare to the plot mentioned in the comment.

Plotting multiple function curves in ggplot using a loop or lapply

Suppose I have the following data:
coef <- list(c(47, 2, 0, 0),
c(7, 42, -8, 0),
c(78, -71, 43, -7))
my_data <- data.frame("x" = rep(1:4, times = 20))
cols <- c("1", "2", "3")
I would like to plot a function for each vector of coefficients in the object coef.
However, using a for loop does not work.
library(ggplot2)
g1 <- ggplot(data = my_data, aes(x = x))
for(i in 1:3){
my_fun <- function(x){
sum(as.vector(outer(x, 0:3, FUN="^")) * coef[[i]])
}
my_fun <- Vectorize(my_fun)
g1 <- (g1 + stat_function(fun = my_fun, aes(col = cols[i]), lwd = 1.2))
print(g1)
}
The result is supposed to look (somehow) like this:
Is there a way to use lapply instead of the loop? Or can I modify the for-loop to fix this problem?

ggplot, plotting in multiple pages with different rows

I wanted to make a graph using facet_wrap and plot it in different pages in a pdf file. I've read son many options, and this works:
R + ggplot: plotting over multiple pages
but only when you have the same rows in each page.
I have this demo data to try explain my case:
A <- data.frame(TIME = rep(c(0, 5, 10, 15, 30, 45, 60), 5))
A$C <- (1 - exp(-0.2*A$TIME))
A$ID <- rep(1:5, each = 7)
A$R <- rnorm(35, mean = 1, sd = 0.01)
A$C2 <- A$C*A$R
Pages <- 5
A2 <- A[c(1,4:8,10:22,24:35),]
So, I have ID with different number of observations. I tried to make a vector with the number of observation in each ID (I want an ID per page), but it doesn't work.
nrws <- ddply(A2, .(ID), "nrow")
nsamp <- nrws[,2]
pdf("Test.pdf")
for (i in seq(Pages))
{
slice = seq(((i-1)*nsamp[i]),(i*nsamp[i]))
slice2 = slice[!(slice > nrow(A2))]
A3 = A2[slice2,]
p1 <- ggplot(A3, aes(x = TIME, y = C2)) +
geom_line(size = 0.5) +
geom_point(size = 1) +
facet_wrap(~ID)
print(p1)
}
dev.off()
Could you help me?
Thanks in advances,
Nacho
I think you were overthinking trying to calculate your "slices". Maybe you want this?
Not entirely sure. If you only want one ID per page you don't need facet_wrap, and you will probably need to set the scale explicitly to keep it the same from page to page.
library(plyr)
A <- data.frame(TIME = rep(c(0, 5, 10, 15, 30, 45, 60), 5))
A$C <- (1 - exp(-0.2*A$TIME))
A$ID <- rep(1:5, each = 7)
A$R <- rnorm(35, mean = 1, sd = 0.01)
A$C2 <- A$C*A$R
Pages <- 5
A2 <- A[c(1,4:8,10:22,24:35),]
nrws <- ddply(A2, .(ID), "nrow")
nsamp <- nrws[,2]
pdf("Test.pdf")
for (i in seq(Pages))
{
# slice = seq(((i-1)*nsamp[i]),(i*nsamp[i]))
# slice2 = slice[!(slice > nrow(A2))]
# A3 = A2[slice2,]
A3 = A2[A2$ID==i,]
p1 <- ggplot(A3, aes(x = TIME, y = C2)) +
geom_line(size = 0.5) +
geom_point(size = 1) +
facet_wrap(~ID)
print(p1)
}
dev.off()

Attaching multiple covariates to ggplot scatterplot

I'm producing a plot like this:
library(ggplot2)
data.dist = matrix(
c(10, -10, 10, -10, 10, -10, 10, -10, 10),
nrow=3,
ncol=3,
byrow = TRUE)
hc <- agnes(dist(data.dist), method = "ward", diss = TRUE)
cluster <- cutree(hc, k=2)
xy <- data.frame(cmdscale(dist(data.dist)), factor(cluster))
names(xy) <- c("x", "y", "cluster")
xy$model <- rownames(xy)
ggplot(xy, aes(x, y)) + geom_point(aes(colour=cluster), size=3)
Which gives me:
However, let's say I want to attach another covariate, say a binary variable c(1, 0, 1) to the data and display all 1 using one symbol (say an X) and all 0 using another symbol (say a dot). How can I accomplish this?
xy<-data.frame(x=rnorm(3),y=rnorm(3),cluster=as.factor(c(1,0,1)),another=as.factor(c(1,1,0)) )
ggplot(xy, aes(x, y,shape=another)) + geom_point(aes(colour=cluster), size=3)

Multiple density graphs different groups (based on factor level) using plyr

I am trying to output multiple density plot from a function, by dividing the dataframe into pieces such that separate density for each level of a factor for corresponding yvar.
set.seed(1234)
Aa = c(rnorm(40000, 50, 10))
Bb = c(rnorm(4000, 70, 10))
Cc = c(rnorm(400, 75, 10))
Dd = c(rnorm(40, 80, 10))
yvar = c(Aa, Bb, Cc, Dd)
gen <- c(rep("Aa", length(Aa)),rep("Bb", length(Bb)), rep("Cc", length(Cc)),
rep("Dd", length(Dd)))
mydf <- data.frame(gen, yvar)
minyvar <- min(yvar)
maxyvar <- max(yvar)
par(mfrow = c(length(levels(mydf$gen)),1))
plotdensity <- function (xf, minyvar, maxyvar){
plot(density(xf), xlim=c(minyvar, maxyvar), main = paste (names(xf),
"distribution", sep = ""))
dens <- density(xf)
x1 <- min(which(dens$x >= quantile(xf, .80)))
x2 <- max(which(dens$x < max(dens$x)))
with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="blu4"))
abline(v= mean(xf), col = "black", lty = 1, lwd =2)
}
require(plyr)
ddply(mydf, .(mydf$gen), plotdensity, yvar, minyvar, maxyvar)
Error in .fun(piece, ...) : unused argument(s) (111.544494112914)
My specific expectation are each plot is named by name of level for example Aa, Bb, Cc, Dd
Arrangement of the graphs see the parameter set, so that we compare density changes and means. compact - Low space between the graphs.
Help appreciated.
Edits:
The following graphs are individually produced, although I want to develop a function that can be applicable to x level for a factor.
I see that #Andrie just beat me to most of this. I'm still going to post my answer, since filling only certain quantiles of the distribution requires a slightly different approach.
set.seed(1234)
Aa = c(rnorm(40000, 50, 10))
Bb = c(rnorm(4000, 70, 10))
Cc = c(rnorm(400, 75, 10))
Dd = c(rnorm(40, 80, 10))
yvar = c(Aa, Bb, Cc, Dd)
gen <- c(rep("Aa", length(Aa)),rep("Bb", length(Bb)), rep("Cc", length(Cc)),
rep("Dd", length(Dd)))
mydf <- data.frame(grp = gen,x = c(Aa,Bb,Cc,Dd))
#Calculate the densities and an indicator for the desire quantile
# for later use in subsetting
mydf <- ddply(mydf,.(grp),.fun = function(x){
tmp <- density(x$x)
x1 <- tmp$x
y1 <- tmp$y
q80 <- x1 >= quantile(x$x,0.8)
data.frame(x=x1,y=y1,q80=q80)
})
#Separate data frame for the means
mydfMean <- ddply(mydf,.(grp),summarise,mn = mean(x))
ggplot(mydf,aes(x = x)) +
facet_wrap(~grp) +
geom_line(aes(y = y)) +
geom_ribbon(data = subset(mydf,q80),aes(ymax = y),ymin = 0, fill = "black") +
geom_vline(data = mydfMean,aes(xintercept = mn),colour = "black")
Here is a way of doing it in ggplot:
set.seed(1234)
mydf <- rbind(
data.frame(gen="Aa", yvar= rnorm(40000, 50, 10)),
data.frame(gen="Bb", yvar=rnorm(4000, 70, 10)),
data.frame(gen="Cc", yvar=rnorm(400, 75, 10)),
data.frame(gen="Dd", yvar=rnorm(40, 80, 10))
)
labels <- ddply(mydf, .(gen), nrow)
means <- ddply(mydf, .(gen), summarize, mean=mean(yvar))
ggplot(mydf, aes(x=yvar)) +
stat_density(fill="blue") +
facet_grid(gen~.) +
theme_bw() +
geom_vline(data=means, aes(xintercept=mean), colour="red") +
geom_text(data=labels, aes(label=paste("n =", V1)), x=5, y=0,
hjust=0, vjust=0) +
opts(title="Distribution")
With sincere thanks to joran and Andrie, the following is just compilation of my favorite from above two posts, just some of readers might want to see.
require(ggplot2)
set.seed(1234)
Aa = c(rnorm(40000, 50, 10))
Bb = c(rnorm(4000, 70, 10))
Cc = c(rnorm(400, 75, 10))
Dd = c(rnorm(40, 80, 10))
yvar = c(Aa, Bb, Cc, Dd)
gen <- c(rep("Aa", length(Aa)),rep("Bb", length(Bb)), rep("Cc", length(Cc)),
rep("Dd", length(Dd)))
mydf <- data.frame(grp = gen,x = c(Aa,Bb,Cc,Dd))
mydf1 <- mydf
#Calculate the densities and an indicator for the desire quantile
# for later use in subsetting
mydf <- ddply(mydf,.(grp),.fun = function(x){
tmp <- density(x$x)
x1 <- tmp$x
y1 <- tmp$y
q80 <- x1 >= quantile(x$x,0.8)
data.frame(x=x1,y=y1,q80=q80)
})
#Separate data frame for the means
mydfMean <- ddply(mydf,.(grp),summarise,mn = mean(x))
labels <- ddply(mydf1, .(grp), nrow)
ggplot(mydf,aes(x = x)) +
facet_grid(grp~.) +
geom_line(aes(y = y)) +
geom_ribbon(data = subset(mydf,q80),aes(ymax = y),ymin = 0,
fill = "black") +
geom_vline(data = mydfMean,aes(xintercept = mn),
colour = "black") + geom_text(data=labels,
aes(label=paste("n =", labels$V1)), x=5, y=0,
hjust=0, vjust=0) +
opts(title="Distribution") + theme_bw()

Resources