Shading every other vertical month, alternating white and gray - r

I have boxplots of data with one set of boxplots for each month. But together, they get busy and it isn't clear which box goes with which date. Is there a way to shade every other vertical month light gray so I can easily see which ones go with which month?
Edit: I'm already using geom_polygon for another part of the plot that I commented out for now.
date <- seq(as.Date('2015-09-15'), as.Date('2016-09-30'), by = "2 days")
x <- rnorm(length(date))
date <- date[order(x)]
Date <- format(as.Date(date), "%Y-%m")
values <- rnorm(length(x),.16,.01)
type <- c(rep("a",63),rep("b",64),rep("c",64))
new.table <- as.data.frame.matrix(table(Date,type))
dataset <- data.frame(values,date,type,Date)
if(length(which(levels(factor(type))=="c"))==0){
count.data <- rep(0,length(levels(factor(Date))))
}else{count.data <- new.table[,names(new.table)=="c"]}
ly <- length(count.data)
max.count <- max(count.data)
max.right <- max.count*4
max.box <- max(dataset$values,na.rm=T)
min.box <- 5/4*min(dataset$values,na.rm=T)-max.box/4
box.25 <- (max.box-min.box)/4
x <- c(0:(ly+1),c((ly+1):0))
y <- c(0,count.data,rep(0,(ly+3)))*(box.25/max.count)+min.box
poly.data <- data.frame(x,y)
dates1 <- levels(factor(dataset$Date))
noB <- length(dates1)
new.table$Date <- rownames(new.table)
library(tidyverse)
library(gridExtra)
library(ggthemes)
library(ggplot2)
p <- ggplot(dataset,aes(x=Date,y=values,fill=type))+
geom_boxplot(position=position_dodge(width = 0.7))+
stat_boxplot(geom="errorbar",width=0.7)+
coord_cartesian(ylim = c(min.box,max.box))+
#geom_polygon(data=poly.data,mapping = aes(x=x,y=y),fill="grey30")+
#scale_y_continuous(sec.axis = sec_axis(~(.-min.box)*max.count/box.25, name = "Sec axis"),breaks = scales::pretty_breaks(n = 10))+
labs(title="Boxplot of values Over Time",y="values",x="Date (year-month)")+
theme_classic(base_size=15)+
theme(axis.text.x = element_text(angle = ifelse(noB>15,45,0), hjust=ifelse(noB>15,1,0.5)),panel.grid.major=element_line("light grey"))
p

I would use geom_rect with a separate data.frame (here: shades)
shades <- data.frame(xmin=seq(1.5,length(unique(dataset$Date))-1.5, 2),
xmax=seq(2.5,length(unique(dataset$Date))+.5, 2),
ymin=0, ymax=Inf)
ggplot(dataset,aes(x=Date,y=values,fill=type))+
geom_boxplot(position=position_dodge(width = 0.7))+
stat_boxplot(geom="errorbar",width=0.7)+
coord_cartesian(ylim = c(min.box,max.box))+
#geom_polygon(data=poly.data,mapping = aes(x=x,y=y),fill="grey30")+
#scale_y_continuous(sec.axis = sec_axis(~(.-min.box)*max.count/box.25, name = "Sec axis"),breaks = scales::pretty_breaks(n = 10))+
labs(title="Boxplot of values Over Time",y="values",x="Date (year-month)")+
theme_classic(base_size=15)+
theme(axis.text.x = element_text(angle = ifelse(noB>15,45,0), hjust=ifelse(noB>15,1,0.5)),panel.grid.major=element_line("light grey")) +
geom_rect(inherit.aes = F, data = shades, mapping = aes(xmin=xmin, xmax=xmax, ymin = ymin, ymax = ymax), alpha = 0.2)

Related

Add an additional X axis to the plot and some lines/annotations to show the percentage of data under it

I was trying to recreate this plot:
using the following code -
library(tidyverse)
set.seed(0); r <- rnorm(10000);
df <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- seq(from = avg - 3*SD, to = avg + 3*SD, by = SD)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
df %>% ggplot(aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = "")
Using the code I plotted this:
,
but this isn't near to the plot that I am trying to create. How do I make an additional axis with the X axis? How do I add the lines to automatically show the percentage of observations? Is there any way, that I can create the plot as nearly identical as possible using ggplot2?
Welcome to SO. Excellent first question!
It's actually quite tricky. You'd need to create a second plot (the second x axis) but it's not the most straight forward to align both perfectly.
I will be using Z.lin's amazing modification of the cowplot package.
I am not using the reprex package, because I think I'd need to define every single function (and I don't know how to use trace within reprex.)
library(tidyverse)
library(cowplot)
set.seed(0); r <- rnorm(10000);
foodf <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- round(seq(from = avg - 3*SD, to = avg + 3*SD, by = SD), 1)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
x2lab <- -3:3
# calculate the density manually
dens_r <- density(r)
# for each x value, calculate the closest x value in the density object and get the respective y values
y_dens <- dens_r$y[sapply(x.scale, function(x) which.min(abs(dens_r$x - x)))]
# added annotation for segments and labels.
# Arrow segments can be added in a similar way.
p1 <-
ggplot(foodf, aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = NULL) +# use NULL here
annotate(geom = "segment", x = x.scale, xend = x.scale,
yend = 1.1 * max(dens_r$y), y = y_dens, lty = 2 ) +
annotate(geom = "text", label = x.lab,
x = x.scale, y = 1.2 * max(dens_r$y))
p2 <-
ggplot(foodf, aes(r)) +
scale_x_continuous(breaks = x.scale, labels = x2lab) +
labs(x = NULL) +
theme_classic() +
theme(axis.line.y = element_blank())
# This is with the modified plot_grid() / align_plot() function!!!
plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(1, 0.1))

How to find clusters of values over threshold for timeseries

I have timeseries and need to find clusters of values over threshold and plot that cluster on separate plot.
My code example. Unfortunately I don't know how to generate well clustered values.
#generate sample data
Sys.setlocale("LC_ALL","English")
set.seed(8)
Values <- sample(0:100,24241, replace = T)
Values <- rpois(24241, lambda=60)
start <- as.POSIXct("2012-01-15 06:10:00")
interval <- 15
end <- start + as.difftime(4, units="days") + as.difftime(5, units = "hours")
DateTimes <- seq(from=start, by=interval, to=end)
my_data_sample <- tibble(datetime = DateTimes, Value = Values)
threshold <- 82
ggplot(data = my_data_sample, aes(x = datetime, y = Value)) +
geom_line(size = 1, color = "darkgreen") +
geom_hline(yintercept=threshold, linetype="dashed", color = "red") +
theme_bw() +
labs(
x= "" ,
y = "",
title = paste("Threshold:", threshold )
) +
scale_x_datetime(date_breaks = "8 hour", labels = date_format("%b %d - %H:%M")) +
theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))
Here is what I need:
I need to find clusters of values over threshold - consecutive or near each other, sort that clusters using cluster length in seconds (longest clusters) or sum of values (most powerful clusters), and plot let's say top 3 of that time periods on separate plots.
Any suggestions how to do that?
You can find runs that follow some expectation using run-length encoding (RLE). At the RLE level, you can filter out runs that are too short on either side. You can play with the run_threshold value until it matches your data.
# Put some actual deviating runs in the data
my_data_sample$Value[5001:5100] <- rpois(100, lambda = 80)
my_data_sample$Value[10001:11000] <- rpois(1000, lambda = 80)
threshold <- 82
rle <- rle(my_data_sample$Value > threshold)
# Find sub-threshold values in between super-threshold values,
# convert these to other class
run_threshold <- 20
rle$values[!rle$values & rle$lengths < run_threshold] <- TRUE
# Restructure rle
rle <- rle(inverse.rle(rle))
# Find short super-threshold values to filter
run_threshold <- 5
rle$values[rle$values & rle$lengths < run_threshold] <- FALSE
rle <- rle(inverse.rle(rle))
# Find run starts and ends
rle_start <- {rle_end <- cumsum(rle$lengths)} - rle$lengths + 1
# Format as data.frame for ggplot
rle_df <- data.frame(
min = my_data_sample$datetime[rle_start],
max = my_data_sample$datetime[rle_end],
value = rle$values
)
ggplot(data = my_data_sample, aes(x = datetime, y = Value)) +
geom_line(size = 1, color = "darkgreen") +
geom_rect(aes(xmin = min, xmax = max, ymin = 0, ymax = 10, fill = value),
data = rle_df, inherit.aes = FALSE) +
geom_hline(yintercept=threshold, linetype="dashed", color = "red") +
theme_bw() +
labs(
x= "" ,
y = "",
title = paste("Threshold:", threshold )
) +
scale_x_datetime(date_breaks = "8 hour", labels = date_format("%b %d - %H:%M")) +
theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))

how do i combine multiple data sources in ggplot using split and sapply?

this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.

Residual plot with ggplot with X-axis as "ranked" residuals

I'm trying to re-create a plot like this in ggplot:.
This graph takes the residuals from a regression output, and plots them in order (with the X-axis being a rank of residuals).
My best attempt at this was something like the following:
library(ggplot2)
library(modelr)
d <- d %>% add_residuals(mod1, var = "resid")
d$resid_rank <- rank(d$resid)
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_bar(stat="identity") +
theme_bw()
However, this yields a completely blank graph. I tried something like this:
ggplot(data = d, aes(x = resid_rank, y = resid)) +
geom_segment(yend = 0, aes(xend=resid)) +
theme_bw()
But this yields the segments that go in the wrong direction. What is the right way to do this, and to color those lines by a third factor?
FAKE DATASET:
library(estimatr)
library(fabricatr)
#simulation
dat <- fabricate(
N = 10000,
y = runif(N, 0, 10),
x = runif(N, 0, 100)
)
#add an outlier
dat <- rbind(dat, c(300, 5))
dat <- rbind(dat, c(500, 3))
dat$y_log <- log(dat$y)
dat$x_log <- log(dat$x)
dat$y_log_s <- scale(log(dat$y))
dat$x_log_s <- scale(log(dat$x))
mod1 <- lm(y_log ~ x_log, data = dat))
I used the build in dataset from the help page on lm() to create this example. I also just directly used resid() to get the residuals. It's unclear where / why the colored bars would be different, but basically you'd need to add a column to your data.frame that specificies why they are red or blue, then pass that to fill.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 3.4.4
#example from lm
ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group <- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight <- c(ctl, trt)
lm.D9 <- lm(weight ~ group)
resids <- data.frame(resid = resid(lm.D9))
#why are some bars red and some blue? No clue - so I'll pick randomly
resids$group <- sample(c("group 1", "group 2"), nrow(resids), replace = TRUE)
#rank
resids$rank <- rank(-1 * resids$resid)
ggplot(resids, aes(rank, resid, fill = group)) +
geom_bar(stat = "identity", width = 1) +
geom_hline(yintercept = c(-1,1), colour = "darkgray", linetype = 2) +
geom_hline(yintercept = c(-2,2), colour = "lightgray", linetype = 1) +
theme_bw() +
theme(panel.grid = element_blank()) +
scale_fill_manual(values = c("group 1" = "red", "group 2" = "blue"))
Created on 2019-01-24 by the reprex package (v0.2.1)

Use R to recreate contour plot made in Igor

This contour plot, made with the Igor program, is popular in atmospheric chemistry and pollution studies:
I'm trying to recreate it with R for a friend who wants to stop using Igor, and we can't quite get it. Here's the dataset (the same data used to make the plot with Igor), and here's what I've got so far to make the plot with R:
# read in the data
dat <- read.csv("contour_plot_data.csv")
# focus on the untransformed values
dat <- dat[, 1:108]
# get Diameter value from col names
Diameter <- as.numeric(gsub("X", "", names(dat)[-1]))
# interpolate between the Diameter values for a smoother contour,
# a seperate interpolation for each row (date value)
# this takes a moment or two...
interp <- seq(min(Diameter), max(Diameter), 0.2)
dat_interp <- data.frame(matrix(0, ncol = length(interp), nrow = nrow(dat)))
for(i in 1:nrow(dat)){
# get the values from row i
vec <- unlist(dat[i, 2:108], use.names = FALSE)
# compute loess interpolations
lo <- loess(vec ~ Diameter)
# predict interpolated values
pr <- predict(lo, newdata = data.frame(Diameter = interp))
# store in a data frame
df <- data.frame(ct = unname(pr), Diameter = interp)
# add as new row to new data frame
dat_interp[i, ] <- df$ct
print(i) # so we can see that it's working
}
# add date col and col names to the interpolated data
names(dat_interp) <- interp
dat_interp$date <- as.character(dat$Time)
# melt data into long format
# see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/
library(tidyr)
gather_cols <- interp
dat_long <- gather_(dat_interp, "Diameter", "dN_dlogDp", gather_cols)
# we want diameter as a numeric
dat_long$Diameter <- as.numeric(as.character(dat_long$Diameter))
# we want date as a date format
x <- as.character(dat_long$date)
date_ <- as.Date(x, format = "%d/%m/%Y")
time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x)))
dat_long$date_time <- as.POSIXct(paste0(date_, " ", time_))
# The Igor plot seems to use log dN_dlogDp values, so let's get those
dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp)
dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN", 0, dat_long$dN_dlogDp_log)
# get on with plottong...
library(ggplot2)
library(scales)
labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) + theme(aspect.ratio = 1/4)
ggplot(dat_long, aes(y = Diameter, x = date_time, fill=dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rainbow(7)) +
scale_y_continuous(expand = c(0,0), breaks = labels_breaks ) +
scale_x_datetime(expand = c(0,0), breaks = date_breaks("12 hours")) +
ylab("Diameter (nm)") +
xlab("Date and time") +
mytheme
My plot could do with a little more finessing with labels and tick marks, etc. However, my main question is why my contour fill looks so different from the Igor plot. The scale seems reversed, and the interpolation looks very different.
How can I make my plot look more like the Igor plot?
Note that these other questions of mine are closely related to the task of recreating this plot:
geom_raster interpolation with log scale
2d density plot for categories
And after I asked this question I have been keeping an updated gist of R code that combines details from the answers to these questions, and successfully replicates these plots (example output included in the gist). That gist is here: https://gist.github.com/benmarwick/9a54cbd325149a8ff405.
UPDATE I've now made a package that will produce these plots: https://github.com/benmarwick/smps
I can get a lot closer to the Igor plot using akima::interp instead of loess for the interpolation:
# read in the data
dat <- read.csv("contour_plot_data.csv")
# focus on the untransformed values
dat <- dat[, 1:108]
# get Diameter value from col names
Diameter <- as.numeric(gsub("X", "", names(dat)[-1]))
# melt data into long format
# see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/
library(tidyr)
dat_long <- gather(dat, "Diameter", "dN_dlogDp", 2:108)
# we want diameter as a numeric
dat_long$Diameter <- as.numeric(gsub("X", "", dat_long$Diameter ))
# we want time as a date-formatted variable
x <- as.character(dat_long$Time)
date_ <- as.Date(x, format = "%d/%m/%Y")
time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x)))
dat_long$Time <- as.POSIXct(paste0(date_, " ", time_))
# The Igor plot seems to use log dN_dlogDp values, so let's get those
dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp)
dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN" |
dat_long$dN_dlogDp_log == "-Inf" , 0, dat_long$dN_dlogDp_log)
# interpolate between the values for a smoother contour
# this takes a moment or two...
library(akima)
xo <- with(dat_long, seq(min(Time), max(Time), 120))
yo <- with(dat_long, seq(min(Diameter), max(Diameter), 0.5))
dat_interp <- with(dat_long, interp(Time, Diameter, dN_dlogDp_log, xo = xo, yo = yo) )
# get on with plotting...
# make into a data frame for ggplot
dat_interp_df <- data.frame(matrix(data = dat_interp$z, ncol = length(dat_interp$y), nrow = length(dat_interp$x)))
names(dat_interp_df) <- dat_interp$y
dat_interp_df$Time <- as.POSIXct(dat_interp$x, origin = "1970-01-01")
# wide to long
dat_interp_df_long <- gather(dat_interp_df, "Diameter", "dN_dlogDp_log", 1:(ncol(dat_interp_df)-1))
dat_interp_df_long$Diameter <- as.numeric(as.character(dat_interp_df_long$Diameter))
# plot
library(ggplot2)
library(scales)
y_labels_breaks <- seq(0, max(Diameter), 100)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, fill = dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(rainbow(50))) +
scale_y_continuous(expand = c(0,0), breaks = y_labels_breaks ) +
scale_x_datetime(expand = c(0,0), breaks = date_breaks("1 day"))
But there is still quite a big difference in the colour mapping, with the Igor plot having wide bands with sharp boundaries, and my plot has fewer colour bands and fuzzy boundaries between them. So I guess I don't quite have the interpolation method that the Igor plot uses.
UPDATE after experimenting with a bunch of colour ramps, I've found a pretty good match in colorRamps::blue2green2red. I've also put a bit of effort here into fancy tickmarks:
# plot
library(ggplot2)
library(scales) # for date_breaks
library(colorRamps) # for blue2green2red
# function for minor tick marks
every_nth <- function(x, nth, empty = TRUE, inverse = FALSE)
{
if (!inverse) {
if(empty) {
x[1:nth == 1] <- ""
x
} else {
x[1:nth != 1]
}
} else {
if(empty) {
x[1:nth != 1] <- ""
x
} else {
x[1:nth == 1]
}
}
}
# add tick marks every two hours
start_date <- min(dat_interp_df_long$Time)
end_date <- max(dat_interp_df_long$Time)
date_breaks_2h <- seq(from = start_date, to = end_date, by = "2 hours")
date_breaks_1_day <- seq(from = start_date, to = end_date, by = "1 day")
multiple <- length(date_breaks_2h) / length(date_breaks_1_day)
insert_minor <- function(major_labs, n_minor) {labs <-
c( sapply( major_labs, function(x) c(x, rep("", multiple) ) ) )
labs[1:(length(labs)-n_minor)]}
y_labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) + theme(aspect.ratio = 1/5)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, fill = dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(100)) +
scale_y_continuous(expand = c(0,0),
labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
breaks = y_labels_breaks) +
scale_x_datetime(expand = c(0,0),
breaks=date_breaks_2h,
labels=insert_minor(format(date_breaks_1_day, "%d %b"),
length(date_breaks_1_day))) +
xlab("Day and time") +
ylab("Diameter (nm)") +
mytheme
The green-blue gradient is still a bit different from the Igor plot. I have very little green at all! Perhaps further experimentation with colour ramps might improve the match there.
To get the y-axis on a log scale, some additional effort is required. We have to use geom_rect and adjust the sizes of each rectangle to fit in the log scale:
################## y-axis with log scale ###########################
# get visually diminishing axis ticks
base_breaks <- function(n = 10){
function(x) {
axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE, n = n)
}
}
# Now with log axis, we need to replace the ymin and ymax
distance <- diff((unique(dat_interp_df_long$Diameter)))/2
upper <- (unique(dat_interp_df_long$Diameter)) + c(distance, distance[length(distance)])
lower <- (unique(dat_interp_df_long$Diameter)) - c(distance[1], distance)
# Create xmin, xmax, ymin, ymax
dat_interp_df_long$xmin <- dat_interp_df_long$Time - 1000 # default of geom_raster is 0.5
dat_interp_df_long$xmax <- dat_interp_df_long$Time + 1000
idx <- rle(dat_interp_df_long$Diameter)$lengths[1]
dat_interp_df_long$ymin <- unlist(lapply(lower, function(i) rep(i, idx)))
dat_interp_df_long$ymax <- unlist(lapply(upper, function(i) rep(i, idx)))
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,
xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
fill = dN_dlogDp_log)) +
geom_rect() +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(1000)) +
scale_y_continuous(expand = c(0,0),
trans = log_trans(), breaks = base_breaks()) +
scale_x_datetime(expand = c(0,0),
breaks=date_breaks_2h,
labels=insert_minor(format(date_breaks_1_day, "%d %b"),
length(date_breaks_1_day))) +
xlab("Day and time") +
ylab("Diameter (nm)") +
mytheme
UPDATE After some experimentation with colour ramps, I've found a pretty close match:
# adjust the colour ramp to match the Igor plot (their colour ramp is pretty uneven! lots of red and blue, it seems.)
colfunc <- colorRampPalette(c( rep("red", 3),
rep("yellow", 1),
rep("green", 2),
"cyan",
rep("blue", 3),
"purple"))
y_labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) + theme(aspect.ratio = 1/5)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, fill = dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(colfunc(100))) +
scale_y_continuous(expand = c(0,0),
labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
breaks = y_labels_breaks) +
scale_x_datetime(expand = c(0,0),
breaks=date_breaks_2h,
labels=insert_minor(format(date_breaks_1_day, "%d %b"),
length(date_breaks_1_day))) +
xlab("Day and time") +
ylab("Diameter (nm)") +
mytheme
The code from this post is also at https://gist.github.com/benmarwick/9a54cbd325149a8ff405
UPDATE I've now made a package that will produce these plots: https://github.com/benmarwick/smps

Resources