Adding values from one data set to the ggplots (with several variables) - r

By using this function, I can add outliers values into the plot of mpg
outlier_values. <- lapply(mtcars[-c(8,9)], function(x){outlier_values <- boxplot.stats(x)$out})
boxplot(mtcars$mpg, main="Pressure Height", boxwex=0.1)
mtext(paste("Outliers: ", paste(outlier_values., collapse=", ")), cex=0.6)
Buy now I want to add the outlier values (outlier1) to the plot of all variables:
library(reshape2)
library(ggplot2)
outlier <- do.call("cbind", lapply(mtcars[-c(8,9)], function(x) boxplot.stats(x)$out))
outlier1 <- melt(outlier)
mtcars_m = melt(mtcars[,-c(8,9)])
names(mtcars_m)=c("X2","CI")
box.plot<- ggplot(mtcars_m, aes(X2, CI,fill=Models)) +
geom_boxplot(width = 0.1) +
facet_wrap(~ Models, scales = "free") +
guides(fill=FALSE) +
labs(x="", y="") +
ggtitle("Box Plots")
How can I do that?

Your code contains some variables which are undefined (Models). I assume you meant X2. Here is the ggplot2 solution:
outlier1 <- melt(data.frame(outlier))
colnames(mtcars_m) <- colnames(outlier1) <- c("X2","CI")
mtcars_m$Outlier <- FALSE
outlier1$Outlier <- TRUE
ggData <- rbind(mtcars_m, outlier1)
ggplot(ggData, aes(x=X2, y=CI, fill=X2) ) +
geom_boxplot() +
geom_point(aes(colour=Outlier)) +
labs(x="",y="") +
ggtitle("Box Plots") +
guides(fill=FALSE) +
facet_wrap(~ X2, scales = "free")

Related

facet plots with different yaxis scales

Plot with multiple facets which have different scales. Simple example:
require(data.table)
require(ggplot2)
nr <- 10000
inp.dt <- rbind(
data.table(type="A", month=sample(seq(as.Date("2011/1/1"), as.Date("2012/1/1"), by="month"), nr, replace=T)),
data.table(type="B", month=sample(seq(as.Date("2011/1/1"), as.Date("2012/1/1"), by="month"), 100*nr, replace=T))
)
plot.dt <- inp.dt[, .(count=.N), .(type,month)]
mnth <- sort(unique(plot.dt[["month"]]))
plot.dt[, ":="(type=factor(type), month=factor(month, label=format(mnth, format="%Y-%b"), ordered=TRUE))]
g <- ggplot(plot.dt, aes(x=month, y=count)) +
geom_bar(stat="identity") + expand_limits(y=0) + facet_grid(type~., scales="free_y")
print(g)
If I remove scales= the top facet becomes uninteresting. Is there a way of showing this information as facets (not on separate pages), while still conveying the vast difference in scales. Eg, how can I set the ymax for the top facet only to a higher number?
I'm not sure what you want the scales set to, so I just picked some numbers arbitrarily.
require(data.table)
require(ggplot2)
nr <- 10000
inp.dt <- rbind(
data.table(type="A", month=sample(seq(as.Date("2011/1/1"), as.Date("2012/1/1"), by="month"), nr, replace=T)),
data.table(type="B", month=sample(seq(as.Date("2011/1/1"), as.Date("2012/1/1"), by="month"), 100*nr, replace=T))
)
plot.dt <- inp.dt[, .(count=.N), .(type,month)]
mnth <- sort(unique(plot.dt[["month"]]))
plot.dt[, ":="(type=factor(type), month=factor(month, label=format(mnth, format="%Y-%b"), ordered=TRUE))]
# g <- ggplot(plot.dt, aes(x=month, y=count)) +
# geom_bar(stat="identity") + expand_limits(y=0) + facet_grid(type~., scales="free_y")
# print(g)
g1 <- ggplot(plot.dt[plot.dt$type=="A",], aes(x=month, y=count)) + scale_y_continuous(limits=c(0,1500))+
geom_bar(stat="identity") + expand_limits(y=0) #+ facet_grid(type~., scales="free_y")
print(g1)
g2 <- ggplot(plot.dt[plot.dt$type=="B",], aes(x=month, y=count)) + scale_y_continuous(limits=c(0,800000))+
geom_bar(stat="identity") + expand_limits(y=0) #+ facet_grid(type~., scales="free_y")
print(g2)
install.packages("gridExtra")
library(gridExtra)
gA <- ggplotGrob(g1)
gB <- ggplotGrob(g2)
p <- arrangeGrob(
gA, gB, nrow = 2, heights = c(0.80, 0.80))
plot(p)

How do I change the number of decimal places on axis labels in ggplot2?

Specifically, this is in a facet_grid. Have googled extensively for similar questions but not clear on the syntax or where it goes. What I want is for every number on the y-axes to have two digits after the decimal, even if the trailing one is 0. Is this a parameter in scale_y_continuous or element_text or...?
row1 <- ggplot(sector_data[sector_data$sector %in% pages[[x]],], aes(date,price)) + geom_line() +
geom_hline(yintercept=0,size=0.3,color="gray50") +
facet_grid( ~ sector) +
scale_x_date( breaks='1 year', minor_breaks = '1 month') +
scale_y_continuous( labels = ???) +
theme(panel.grid.major.x = element_line(size=1.5),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_text(size=8),
axis.ticks=element_blank()
)
From the help for ?scale_y_continuous, the argument 'labels' can be a function:
labels One of:
NULL for no labels
waiver() for the default labels computed by the transformation object
A character vector giving labels (must be same length as breaks)
A function that takes the breaks as input and returns labels as output
We will use the last option, a function that takes breaks as an argument and returns a number with 2 decimal places.
#Our transformation function
scaleFUN <- function(x) sprintf("%.2f", x)
#Plot
library(ggplot2)
p <- ggplot(mpg, aes(displ, cty)) + geom_point()
p <- p + facet_grid(. ~ cyl)
p + scale_y_continuous(labels=scaleFUN)
The "scales" package has some nice functions for formatting the axes. One of these functions is number_format(). So you don't have to define your function first.
library(ggplot2)
# building on Pierre's answer
p <- ggplot(mpg, aes(displ, cty)) + geom_point()
p <- p + facet_grid(. ~ cyl)
# here comes the difference
p + scale_y_continuous(
labels = scales::number_format(accuracy = 0.01))
# the function offers some other nice possibilities, such as controlling your decimal
# mark, here ',' instead of '.'
p + scale_y_continuous(
labels = scales::number_format(accuracy = 0.01,
decimal.mark = ','))
The scales package has been updated, and number_format() has been retired. Use label_number(). This can also be applied to percentages and other continuous scales (ex: label_percent(); https://scales.r-lib.org/reference/label_percent.html).
#updating Rtists answer with latest syntax from scales
library(ggplot2); library(scales)
p <- ggplot(mpg, aes(displ, cty)) + geom_point()
p <- p + facet_grid(. ~ cyl)
# number_format() is retired; use label_number() instead
p + scale_y_continuous(
labels = label_number(accuracy = 0.01)
)
# for whole numbers use accuracy = 1
p + scale_y_continuous(
labels = label_number(accuracy = 1)
)
Several people have suggested the scales package, but you could just do pretty much the same with base R as well, here by using the format() function.
require(ggplot2)
ggplot(iris, aes(y = Sepal.Length, x = Sepal.Width)) +
geom_point() +
scale_y_continuous(labels = function(x) format(x, nsmall = 2)) +
facet_wrap(~Species)

Merging two plots in ggplot2 and keeping individual features of the plots (geom_text, geom_vline)

I have two geom_line time series with some geom_text & geom_points on each plot.
One of the two plots additionally has geom_vline. I wonder if it is possible to merge two but i failed to get a solution.
Here are the two plots:
require(zoo)
require(ggplot2)
set.seed(10)
# plot 1:
tmp1 <- xts(cumsum(rnorm(5000,1,10)), Sys.Date()-5000:1)
data.tmp1 = data.frame(date=as.Date(index(tmp1)),
value=drop(coredata(tmp1)))
data.tmp1.year.end = data.frame(date=as.Date(index(tmp1[endpoints(tmp1, "years", 1)])),
value= drop(coredata(tmp1[endpoints(tmp1, "years", 1)])))
plot1 =
ggplot(data.tmp1, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp1.year.end, col="red") +
geom_text(data=data.tmp1.year.end, label=data.tmp1.year.end$value, vjust=0, hjust=1)
# plot 2:
tmp2 <- xts(cumsum(rnorm(5000,1,100)), Sys.Date()-5000:1)
data.tmp2 = data.frame(date=as.Date(index(tmp2)),
value=drop(coredata(tmp2)))
data.tmp2.year.end = data.frame(date=as.Date(index(tmp2[endpoints(tmp2, "years", 1)])),
value= drop(coredata(tmp2[endpoints(tmp2, "years", 1)])))
tmp2.date =as.Date(c("2008-01-01"))
plot2 =
ggplot(data.tmp2, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp2.year.end, col="red") +
geom_vline(xintercept=as.numeric(tmp2.date), linetype="dotted") +
geom_text(data=data.tmp2.year.end, label=data.tmp2.year.end$value, vjust=0, hjust=1)
The goal now is that plot1 and plot2 share one xaxis and all features of the individual graphs are kept in the corresponding plot.
The result should look like this:
You might try combining your daily data sets and your year end data sets into single data frames and then using ggplot's faceting to display on a single date axis. Code could look like:
data.tmp1 <- cbind(data.tmp1, data_name="tmp1")
data.tmp1.year.end <- cbind(data.tmp1.year.end, data_name="tmp1")
data.tmp2 <- cbind(data.tmp2, data_name="tmp2")
data.tmp2.year.end <- cbind(data.tmp2.year.end, data_name="tmp2")
data.tmp <- rbind(data.tmp1,data.tmp2)
data.tmp.year.end <- rbind(data.tmp1.year.end, data.tmp2.year.end)
ggplot(data.tmp, aes(x=date, y=value)) +
geom_line(aes(y=value), size=1) +
geom_point(data=data.tmp.year.end, col="red") +
geom_text(data=data.tmp.year.end, aes(label=data.tmp.year.end$value), vjust=0, hjust=1) +
geom_vline(xintercept=as.numeric(tmp2.date), linetype="dotted") +
facet_grid( data_name ~ . , scales="free_y")
which gives the chart

How to highlight an item of time-series in a ggplot2 plot

I wish to highlight segments above or below a certain value in a time series by a unique colour or a shape. In the example data I am decomposing a mortality time series into its components. My goal is to highlight the segments when the mortality in the trend component falls below 35 (deep between 1997 and 2000) and when the residual component is above 100 (the spike). I have tried to use annotate, but that did not produce what I wanted.
#Load library and obtain data
library(gamair)
library(tsModel)
library(ggplot2)
library(reshape2)
data<-data(chicago)
## create variables, decompose TS
chicago$date<-seq(from=as.Date("1987-01-01"), to=as.Date("2000-12-31"),length=5114)
data<- chicago[,c("date","death")]
mort <- tsdecomp(data$death, c(1, 2, 15, 5114))
## Convert matrix to df, rename, melt
df<-as.data.frame(mort)
names(df)[1] <- "Trend"
names(df)[2] <- "Seasonal"
names(df)[3] <- "Residual"
df$date<-seq(as.Date("1987-01-01"), as.Date("2000-12-31"), "day")
meltdf <- melt(df,id="date")
## Plot
ggplot(meltdf,aes(x=date,y=value,colour=variable,group=variable)) + geom_line() +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none")
annotate("rect", xmin=1995-01-01,xmax=1996-01-01,ymin= 10, ymax=300, alpha = .2,fill="blue")
Well, this works but I must admit it's more work that I'd hoped.
get.box <- function(data) {
rng <- range(data$date) + c(-50,50)
z <- meltdf[meltdf$date>=rng[1] & meltdf$date <=rng[2] & meltdf$variable==unique(data$variable),]
data.frame(variable=unique(z$variable),
xmin=min(z$date),xmax=max(z$date),ymin=min(z$value),ymax=max(z$value))
}
hilight.trend <- get.box(with(meltdf,meltdf[variable=="Trend" & value<35,]))
hilight.resid <- get.box(with(meltdf,meltdf[variable=="Residual" & value>100,]))
ggplot(meltdf,aes(colour=variable,group=variable)) +
geom_line(aes(x=date,y=value)) +
theme_bw() +
ylab("") + xlab("") +
facet_grid(variable ~ . , scales = "free") +
theme(legend.position = "none") +
geom_rect(data=hilight.trend, alpha=0.2, fill="red",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin)) +
geom_rect(data=hilight.resid, alpha=0.2, fill="blue",
aes(xmax=xmax,xmin=xmin,ymax=ymax,ymin=ymin))
You can't really use annotate(...) with facets, because you will get the same annotation on all the facets. So you're left with something like geom_rect(...). The problem here is that geom_rect(...) draws a rectangle for every row in the data. So you need to create an auxiliary dataset with just one row for each variable, containing the x- and y- min and max.

Varying factor order in each facet of ggplot2

I am trying to create a Cleveland Dot Plot given for two categories in this case J and K. The problem is the elements A,B,C are in both categories so R keeps farting. I have made a simple example:
x <- c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17])
type <- c(rep("J",10),rep("K",10))
y <- rnorm(n=20,10,2)
data <- data.frame(x,y,type)
data
data$type <- as.factor(data$type)
nameorder <- data$x[order(data$type,data$y)]
data$x <- factor(data$x,levels=nameorder)
ggplot(data, aes(x=y, y=x)) +
geom_segment(aes(yend=x), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=type)) +
scale_colour_brewer(palette="Set1", limits=c("J","K"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
Ideally, I would want a dot plot for both categories(J,K) individually with each factor(vector x) decreasing with respect to the y vector. What ends up happening is that both categories aren't going from biggest to smallest and are erratic at the end instead. Please help!
Unfortunately factors can only have one set of levels. The only way i've found to do this is actually to create two separate data.frames from your data and re-level the factor in each. For example
data <- data.frame(
x = c(LETTERS[1:10],LETTERS[1:3],LETTERS[11:17]),
y = rnorm(n=20,10,2),
type= c(rep("J",10),rep("K",10))
)
data$type <- as.factor(data$type)
J<-subset(data, type=="J")
J$x <- reorder(J$x, J$y, max)
K<-subset(data, type=="K")
K$x <- reorder(K$x, K$y, max)
Now we can plot them with
ggplot(mapping = aes(x=y, y=x, xend=0, yend=x)) +
geom_segment(data=J, colour="grey50") +
geom_point(data=J, size=3, aes(colour=type)) +
geom_segment(data=K, colour="grey50") +
geom_point(data=K, size=3, aes(colour=type)) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(type ~ ., scales="free_y", space="free_y")
which results in

Resources