Varying factor order in each facet of ggplot2 - r

I am trying to create a Cleveland Dot Plot given for two categories in this case J and K. The problem is the elements A,B,C are in both categories so R keeps farting. I have made a simple example:
x <- c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17])
type <- c(rep("J",10),rep("K",10))
y <- rnorm(n=20,10,2)
data <- data.frame(x,y,type)
data
data$type <- as.factor(data$type)
nameorder <- data$x[order(data$type,data$y)]
data$x <- factor(data$x,levels=nameorder)
ggplot(data, aes(x=y, y=x)) +
geom_segment(aes(yend=x), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=type)) +
scale_colour_brewer(palette="Set1", limits=c("J","K"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
Ideally, I would want a dot plot for both categories(J,K) individually with each factor(vector x) decreasing with respect to the y vector. What ends up happening is that both categories aren't going from biggest to smallest and are erratic at the end instead. Please help!

Unfortunately factors can only have one set of levels. The only way i've found to do this is actually to create two separate data.frames from your data and re-level the factor in each. For example
data <- data.frame(
x = c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17]),
y = rnorm(n=20,10,2),
type= c(rep("J",10),rep("K",10))
)
data$type <- as.factor(data$type)
J<-subset(data, type=="J")
J$x <- reorder(J$x, J$y, max)
K<-subset(data, type=="K")
K$x <- reorder(K$x, K$y, max)
Now we can plot them with
ggplot(mapping = aes(x=y, y=x, xend=0, yend=x)) +
geom_segment(data=J, colour="grey50") +
geom_point(data=J, size=3, aes(colour=type)) +
geom_segment(data=K, colour="grey50") +
geom_point(data=K, size=3, aes(colour=type)) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
which results in

Related

Adding values from one data set to the ggplots (with several variables)

By using this function, I can add outliers values into the plot of mpg
outlier_values. <- lapply(mtcars[-c(8,9)], function(x){outlier_values <- boxplot.stats(x)$out})
boxplot(mtcars$mpg, main="Pressure Height", boxwex=0.1)
mtext(paste("Outliers: ", paste(outlier_values., collapse=", ")), cex=0.6)
Buy now I want to add the outlier values (outlier1) to the plot of all variables:
library(reshape2)
library(ggplot2)
outlier <- do.call("cbind", lapply(mtcars[-c(8,9)], function(x) boxplot.stats(x)$out))
outlier1 <- melt(outlier)
mtcars_m = melt(mtcars[,-c(8,9)])
names(mtcars_m)=c("X2","CI")
box.plot<- ggplot(mtcars_m, aes(X2, CI,fill=Models)) +
geom_boxplot(width = 0.1) +
facet_wrap(~ Models, scales = "free") +
guides(fill=FALSE) +
labs(x="", y="") +
ggtitle("Box Plots")
How can I do that?
Your code contains some variables which are undefined (Models). I assume you meant X2. Here is the ggplot2 solution:
outlier1 <- melt(data.frame(outlier))
colnames(mtcars_m) <- colnames(outlier1) <- c("X2","CI")
mtcars_m$Outlier <- FALSE
outlier1$Outlier <- TRUE
ggData <- rbind(mtcars_m, outlier1)
ggplot(ggData, aes(x=X2, y=CI, fill=X2) ) +
geom_boxplot() +
geom_point(aes(colour=Outlier)) +
labs(x="",y="") +
ggtitle("Box Plots") +
guides(fill=FALSE) +
facet_wrap(~ X2, scales = "free")

Annotate x-axis with N in faceted plot

I'm trying to produce a boxplot of some numeric outcome broken down by treatment condition and visit number, with the number of observations in each box placed under the plot, and the visit numbers labeled as well. Here's some fake data that will serve to illustrate, and I give two examples of things I've tried that didn't quite work.
library(ggplot2)
library(plyr)
trt <- factor(rep(LETTERS[1:2],150),ordered=TRUE)
vis <- factor(c(rep(1,150),rep(2,100),rep(3,50)),ordered=TRUE)
val <- rnorm(300)
data <- data.frame(trt,vis,val)
data.sum <- ddply(data, .(vis, trt), summarise,
N=length(na.omit(val)))
mytheme <- theme_bw() + theme(panel.margin = unit(0, "lines"), strip.background = element_blank())
The below code produces a plot that has N labels where I want them. It does this by grabbing summary data from an auxiliary dataset I created. However, I couldn't figure out how to also label visit on the x-axis (ideally, below the individual box labels), or to delineate visits visually in other ways (e.g. lines separating them into panels).
plot1 <- ggplot(data) +
geom_boxplot(aes(x=vis:trt,y=val,group=vis:trt,colour=trt), show.legend=FALSE) +
scale_x_discrete(labels=paste(data.sum$trt,data.sum$N,sep="\n")) +
labs(x="Visit") + mytheme
The plot below is closer to what I want than the one above, in that it has a nice hierarchy of treatments and visits, and a pretty format delineating the visits. However, for each panel it grabs the Ns from the first row in the summary data that matches the treatment condition, because it doesn't "know" that each facet needs to use the row corresponding to that visit.
plot2 <- ggplot(data) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=paste(data.sum$trt,data.sum$N,sep="\n")) +
labs(x="Visit") + mytheme
One workaround is to manipulate your dataset so your x variable is the interaction between trt and N.
Working off what you already have, you can add N to the original dataset via a merge.
test = merge(data, data.sum)
Then make a new variable that is the combination of trt and N.
test = transform(test, trt2 = paste(trt, N, sep = "\n"))
Now make the plot, using the new trt2 variable on the x axis and using scales = "free_x" in facet_wrap to allow for the different labels per facet.
ggplot(test) +
geom_boxplot(aes(x = trt2, y = val, group = trt, colour = trt), show.legend = FALSE) +
facet_wrap(~ vis, drop = FALSE, switch="x", nrow = 1, scales = "free_x") +
labs(x="Visit") +
mytheme
Since this functionality isn't built in a good work-around is grid.extra:
library(gridExtra)
p1 <- ggplot(data[data$vis==1,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[1:2]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
p2 <- ggplot(data[data$vis==2,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[3:4]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
p3 <- ggplot(data[data$vis==3,]) + geom_boxplot(aes(x=trt,y=val,group=trt,colour=trt), show.legend=FALSE) +
#facet_wrap(~ vis, drop=FALSE, switch="x", nrow=1) +
scale_x_discrete(labels=lb[5:6]) + #paste(data.sum$trt,data.sum$N,sep="\n")
labs(x="Visit") + mytheme
grid.arrange(p1,p2,p3,nrow=1,ncol=3) # fully customizable
Related:
Varying axis labels formatter per facet in ggplot/R
You can also make them vertical or do other transformations:

R - How to overlay the average of a set of iid RVs

In the code below I build a 40x1000 data frame where in each column I have the cumulative means for successive random draws from an exponential distribution with parameter lambda = 0.2.
I add an additional column to host the specific number of the "draw".
I also calculate the rowmeans as df_means.
How do I add df_means (as a black line) on top of all my simulated RVs? I don't understand ggplot well enough to do this.
df <- data.frame(replicate(1000,cumsum(rexp(40,lambda))/(1:40)))
df$draw <- seq(1,40)
df_means <- rowMeans(df)
Molten <- melt(df, id.vars="draw")
ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none") + geom_line(df_means)
How would I add plot(df_means, type="l") to my ggplot, below?
Thank you,
You can make another data.frame with the means and ids and use that to draw the line,
df_means <- rowMeans(df)
means <- data.frame(id=1:40, mu=df_means)
ggplot(Molten, aes(x=draw, y=value, colour=variable)) +
geom_line() +
theme(legend.position = "none") +
geom_line(data=means, aes(x=id, y=mu), color="black")
As described here
stat_sum_df <- function(fun, geom="crossbar", ...) {
stat_summary(fun.data=fun, colour="red", geom=geom, width=0.2, ...)
}
k<-ggplot(Molten, aes(x = draw, y = value, colour = variable)) + geom_line() + theme(legend.position = "none")
k+stat_sum_single(mean) #gives you the required plot

How to highlight an item of time-series in a ggplot2 plot

I wish to highlight segments above or below a certain value in a time series by a unique colour or a shape. In the example data I am decomposing a mortality time series into its components. My goal is to highlight the segments when the mortality in the trend component falls below 35 (deep between 1997 and 2000) and when the residual component is above 100 (the spike). I have tried to use annotate, but that did not produce what I wanted.
#Load library and obtain data
library(gamair)
library(tsModel)
library(ggplot2)
library(reshape2)
data<-data(chicago)
## create variables, decompose TS
chicago$date<-seq(from=as.Date("1987-01-01"), to=as.Date("2000-12-31"),length=5114)
data<- chicago[,c("date","death")]
mort <- tsdecomp(data$death, c(1, 2, 15, 5114))
## Convert matrix to df, rename, melt
df<-as.data.frame(mort)
names(df)[1] <- "Trend"
names(df)[2] <- "Seasonal"
names(df)[3] <- "Residual"
df$date<-seq(as.Date("1987-01-01"), as.Date("2000-12-31"), "day")
meltdf <- melt(df,id="date")
## Plot
ggplot(meltdf,aes(x=date,y=value,colour=variable,group=variable)) + geom_line() +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none")
annotate("rect", xmin=1995-01-01,xmax=1996-01-01,ymin= 10, ymax=300, alpha = .2,fill="blue")
Well, this works but I must admit it's more work that I'd hoped.
get.box <- function(data) {
rng <- range(data$date) + c(-50,50)
z <- meltdf[meltdf$date>=rng[1] & meltdf$date <=rng[2] & meltdf$variable==unique(data$variable),]
data.frame(variable=unique(z$variable),
xmin=min(z$date),xmax=max(z$date),ymin=min(z$value),ymax=max(z$value))
}
hilight.trend <- get.box(with(meltdf,meltdf[variable=="Trend" & value<35,]))
hilight.resid <- get.box(with(meltdf,meltdf[variable=="Residual" & value>100,]))
ggplot(meltdf,aes(colour=variable,group=variable)) +
geom_line(aes(x=date,y=value)) +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none") +
geom_rect(data=hilight.trend, alpha=0.2, fill="red",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin)) +
geom_rect(data=hilight.resid, alpha=0.2, fill="blue",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin))
You can't really use annotate(...) with facets, because you will get the same annotation on all the facets. So you're left with something like geom_rect(...). The problem here is that geom_rect(...) draws a rectangle for every row in the data. So you need to create an auxiliary dataset with just one row for each variable, containing the x- and y- min and max.

Grouping labels when x is a factor variable in ggplot2

I'm trying to replace the x-axis labels "A0" and "A1" by one "A" which can be placed in the middle of "A0" and "A1". It would be better if there is a method which works like the following question:
grouping of axis labels ggplot2
By that, I mean to redraw the x-axis only for each group, and leave a blank between groups.
Here is the code I'm working on:
y = 1*round(runif(20)*10,1)
x1 = c("A","B")
x2 = c(0,1)
x = expand.grid(x1,x2)
xy = cbind(x,y)
xy$z = paste(xy$Var1,xy$Var2,sep="")
p <- ggplot(xy, aes(x=factor(z), y=y,fill=factor(Var2)))
p + geom_boxplot() + geom_jitter(position=position_jitter(width=.2)) + theme_bw() + xlab("X") + ylab("Y") + scale_fill_discrete(name="Var2",breaks=c(0, 1),labels=c("T", "C"))
Try this. No need for the variable z, just use position="dodge":
p <- ggplot(xy, aes(x=factor(Var1), y=y,fill=factor(Var2)))
p + geom_boxplot(position="dodge") + geom_jitter(position=position_jitter(width=.2)) + theme_bw() + xlab("X") + ylab("Y") + scale_fill_discrete(name="Var2",breaks=c(0, 1),labels=c("T", "C"))

Resources