This contour plot, made with the Igor program, is popular in atmospheric chemistry and pollution studies:
I'm trying to recreate it with R for a friend who wants to stop using Igor, and we can't quite get it. Here's the dataset (the same data used to make the plot with Igor), and here's what I've got so far to make the plot with R:
# read in the data
dat <- read.csv("contour_plot_data.csv")
# focus on the untransformed values
dat <- dat[, 1:108]
# get Diameter value from col names
Diameter <- as.numeric(gsub("X", "", names(dat)[-1]))
# interpolate between the Diameter values for a smoother contour,
# a seperate interpolation for each row (date value)
# this takes a moment or two...
interp <- seq(min(Diameter), max(Diameter), 0.2)
dat_interp <- data.frame(matrix(0, ncol = length(interp), nrow = nrow(dat)))
for(i in 1:nrow(dat)){
# get the values from row i
vec <- unlist(dat[i, 2:108], use.names = FALSE)
# compute loess interpolations
lo <- loess(vec ~ Diameter)
# predict interpolated values
pr <- predict(lo, newdata = data.frame(Diameter = interp))
# store in a data frame
df <- data.frame(ct = unname(pr), Diameter = interp)
# add as new row to new data frame
dat_interp[i, ] <- df$ct
print(i) # so we can see that it's working
}
# add date col and col names to the interpolated data
names(dat_interp) <- interp
dat_interp$date <- as.character(dat$Time)
# melt data into long format
# see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/
library(tidyr)
gather_cols <- interp
dat_long <- gather_(dat_interp, "Diameter", "dN_dlogDp", gather_cols)
# we want diameter as a numeric
dat_long$Diameter <- as.numeric(as.character(dat_long$Diameter))
# we want date as a date format
x <- as.character(dat_long$date)
date_ <- as.Date(x, format = "%d/%m/%Y")
time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x)))
dat_long$date_time <- as.POSIXct(paste0(date_, " ", time_))
# The Igor plot seems to use log dN_dlogDp values, so let's get those
dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp)
dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN", 0, dat_long$dN_dlogDp_log)
# get on with plottong...
library(ggplot2)
library(scales)
labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) + theme(aspect.ratio = 1/4)
ggplot(dat_long, aes(y = Diameter, x = date_time, fill=dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rainbow(7)) +
scale_y_continuous(expand = c(0,0), breaks = labels_breaks ) +
scale_x_datetime(expand = c(0,0), breaks = date_breaks("12 hours")) +
ylab("Diameter (nm)") +
xlab("Date and time") +
mytheme
My plot could do with a little more finessing with labels and tick marks, etc. However, my main question is why my contour fill looks so different from the Igor plot. The scale seems reversed, and the interpolation looks very different.
How can I make my plot look more like the Igor plot?
Note that these other questions of mine are closely related to the task of recreating this plot:
geom_raster interpolation with log scale
2d density plot for categories
And after I asked this question I have been keeping an updated gist of R code that combines details from the answers to these questions, and successfully replicates these plots (example output included in the gist). That gist is here: https://gist.github.com/benmarwick/9a54cbd325149a8ff405.
UPDATE I've now made a package that will produce these plots: https://github.com/benmarwick/smps
I can get a lot closer to the Igor plot using akima::interp instead of loess for the interpolation:
# read in the data
dat <- read.csv("contour_plot_data.csv")
# focus on the untransformed values
dat <- dat[, 1:108]
# get Diameter value from col names
Diameter <- as.numeric(gsub("X", "", names(dat)[-1]))
# melt data into long format
# see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/
library(tidyr)
dat_long <- gather(dat, "Diameter", "dN_dlogDp", 2:108)
# we want diameter as a numeric
dat_long$Diameter <- as.numeric(gsub("X", "", dat_long$Diameter ))
# we want time as a date-formatted variable
x <- as.character(dat_long$Time)
date_ <- as.Date(x, format = "%d/%m/%Y")
time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x)))
dat_long$Time <- as.POSIXct(paste0(date_, " ", time_))
# The Igor plot seems to use log dN_dlogDp values, so let's get those
dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp)
dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN" |
dat_long$dN_dlogDp_log == "-Inf" , 0, dat_long$dN_dlogDp_log)
# interpolate between the values for a smoother contour
# this takes a moment or two...
library(akima)
xo <- with(dat_long, seq(min(Time), max(Time), 120))
yo <- with(dat_long, seq(min(Diameter), max(Diameter), 0.5))
dat_interp <- with(dat_long, interp(Time, Diameter, dN_dlogDp_log, xo = xo, yo = yo) )
# get on with plotting...
# make into a data frame for ggplot
dat_interp_df <- data.frame(matrix(data = dat_interp$z, ncol = length(dat_interp$y), nrow = length(dat_interp$x)))
names(dat_interp_df) <- dat_interp$y
dat_interp_df$Time <- as.POSIXct(dat_interp$x, origin = "1970-01-01")
# wide to long
dat_interp_df_long <- gather(dat_interp_df, "Diameter", "dN_dlogDp_log", 1:(ncol(dat_interp_df)-1))
dat_interp_df_long$Diameter <- as.numeric(as.character(dat_interp_df_long$Diameter))
# plot
library(ggplot2)
library(scales)
y_labels_breaks <- seq(0, max(Diameter), 100)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, fill = dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(rainbow(50))) +
scale_y_continuous(expand = c(0,0), breaks = y_labels_breaks ) +
scale_x_datetime(expand = c(0,0), breaks = date_breaks("1 day"))
But there is still quite a big difference in the colour mapping, with the Igor plot having wide bands with sharp boundaries, and my plot has fewer colour bands and fuzzy boundaries between them. So I guess I don't quite have the interpolation method that the Igor plot uses.
UPDATE after experimenting with a bunch of colour ramps, I've found a pretty good match in colorRamps::blue2green2red. I've also put a bit of effort here into fancy tickmarks:
# plot
library(ggplot2)
library(scales) # for date_breaks
library(colorRamps) # for blue2green2red
# function for minor tick marks
every_nth <- function(x, nth, empty = TRUE, inverse = FALSE)
{
if (!inverse) {
if(empty) {
x[1:nth == 1] <- ""
x
} else {
x[1:nth != 1]
}
} else {
if(empty) {
x[1:nth != 1] <- ""
x
} else {
x[1:nth == 1]
}
}
}
# add tick marks every two hours
start_date <- min(dat_interp_df_long$Time)
end_date <- max(dat_interp_df_long$Time)
date_breaks_2h <- seq(from = start_date, to = end_date, by = "2 hours")
date_breaks_1_day <- seq(from = start_date, to = end_date, by = "1 day")
multiple <- length(date_breaks_2h) / length(date_breaks_1_day)
insert_minor <- function(major_labs, n_minor) {labs <-
c( sapply( major_labs, function(x) c(x, rep("", multiple) ) ) )
labs[1:(length(labs)-n_minor)]}
y_labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) + theme(aspect.ratio = 1/5)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, fill = dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(100)) +
scale_y_continuous(expand = c(0,0),
labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
breaks = y_labels_breaks) +
scale_x_datetime(expand = c(0,0),
breaks=date_breaks_2h,
labels=insert_minor(format(date_breaks_1_day, "%d %b"),
length(date_breaks_1_day))) +
xlab("Day and time") +
ylab("Diameter (nm)") +
mytheme
The green-blue gradient is still a bit different from the Igor plot. I have very little green at all! Perhaps further experimentation with colour ramps might improve the match there.
To get the y-axis on a log scale, some additional effort is required. We have to use geom_rect and adjust the sizes of each rectangle to fit in the log scale:
################## y-axis with log scale ###########################
# get visually diminishing axis ticks
base_breaks <- function(n = 10){
function(x) {
axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE, n = n)
}
}
# Now with log axis, we need to replace the ymin and ymax
distance <- diff((unique(dat_interp_df_long$Diameter)))/2
upper <- (unique(dat_interp_df_long$Diameter)) + c(distance, distance[length(distance)])
lower <- (unique(dat_interp_df_long$Diameter)) - c(distance[1], distance)
# Create xmin, xmax, ymin, ymax
dat_interp_df_long$xmin <- dat_interp_df_long$Time - 1000 # default of geom_raster is 0.5
dat_interp_df_long$xmax <- dat_interp_df_long$Time + 1000
idx <- rle(dat_interp_df_long$Diameter)$lengths[1]
dat_interp_df_long$ymin <- unlist(lapply(lower, function(i) rep(i, idx)))
dat_interp_df_long$ymax <- unlist(lapply(upper, function(i) rep(i, idx)))
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,
xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
fill = dN_dlogDp_log)) +
geom_rect() +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(1000)) +
scale_y_continuous(expand = c(0,0),
trans = log_trans(), breaks = base_breaks()) +
scale_x_datetime(expand = c(0,0),
breaks=date_breaks_2h,
labels=insert_minor(format(date_breaks_1_day, "%d %b"),
length(date_breaks_1_day))) +
xlab("Day and time") +
ylab("Diameter (nm)") +
mytheme
UPDATE After some experimentation with colour ramps, I've found a pretty close match:
# adjust the colour ramp to match the Igor plot (their colour ramp is pretty uneven! lots of red and blue, it seems.)
colfunc <- colorRampPalette(c( rep("red", 3),
rep("yellow", 1),
rep("green", 2),
"cyan",
rep("blue", 3),
"purple"))
y_labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) + theme(aspect.ratio = 1/5)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, fill = dN_dlogDp_log)) +
geom_raster(interpolate = TRUE) +
scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(colfunc(100))) +
scale_y_continuous(expand = c(0,0),
labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
breaks = y_labels_breaks) +
scale_x_datetime(expand = c(0,0),
breaks=date_breaks_2h,
labels=insert_minor(format(date_breaks_1_day, "%d %b"),
length(date_breaks_1_day))) +
xlab("Day and time") +
ylab("Diameter (nm)") +
mytheme
The code from this post is also at https://gist.github.com/benmarwick/9a54cbd325149a8ff405
UPDATE I've now made a package that will produce these plots: https://github.com/benmarwick/smps
Related
I am trying to make a figure using this excel Data herelike this , but I am unable to put two variables on x-axis with different values.
library(xlsx2)
test <- read.xlsx2("E:/Plot/oz.xlsx",1, header=TRUE)
test$Ozone = as.numeric(as.character(test$Ozone))
test$Altitude = as.numeric(as.character(test$Altitude))
test$Pressure = as.numeric(as.character(test$Pressure))
round(test$Altitude) # i want to round the values of Altitude
library(ggplot2)
ylim.prim <- c(0, 34) # in this example, precipitation
ylim.sec <- c(1010, 10)
b <- diff(ylim.prim)/diff(ylim.sec)
a <- ylim.prim[1] - b*ylim.sec[1]
ggplot(test, aes(x = Ozone, y = Altitude,color = 'Ozone Partial Pressure (mPa)'))+
geom_path(aes(x = Ozone))+ scale_y_continuous(name="Altitude (km)",sec.axis=sec_axis(~(.- a)/b, name = 'Pressure (hPa)'))+
scale_x_continuous(name="Temperature (km)",sec.axis=sec_axis(~(.), name = 'Temperature (C)'))+
theme_test() + theme(legend.position = c(0.01, 0.14),legend.justification = c(0, -4))
but I get a plot like this .
I would appreciate any help at this stage. Thank you
If you want two labels per break, you can seperate them with \n. You can do this manually, or if you know the transformation with a function provided to the labels argument of the scale. From the example figure I glanced that temperature is 4x - 100 wherein x is the ozon label. However, temperature and ozone don't appear to be colinear from your data.
library(ggplot2)
# Downloaded from google sheets as tsv
file <- file.choose()
df <- read.table(file, sep = "\t", header = TRUE)
# Per example figure
ozone2temp <- function(x){x * 4 - 100}
# Simplified for brevity
ggplot(df) +
geom_path(aes(Ozone, Altitude, colour = "Ozone")) +
scale_x_continuous(
labels = function(x) {
paste(x, ozone2temp(x), sep = "\n")
},
name = "Ozone\nTemp"
)
Created on 2021-08-10 by the reprex package (v1.0.0)
EDIT:
If you want to also plot altitude with temperature as the x-variable, you need the reverse transformation as well:
library(ggplot2)
# Downloaded from google sheets as tsv
file <- file.choose()
df <- read.table(file, sep = "\t", header = TRUE)
# Per example figure
ozone2temp <- function(x){x * 4 - 100}
temp2ozone <- function(x){(x + 100) / 4}
ggplot(df, aes(y = Altitude)) +
geom_path(aes(Ozone, colour = "Ozone")) +
geom_path(aes(temp2ozone(Temperature),
colour = "Temperature")) +
scale_x_continuous(
labels = function(x) {
paste(x, ozone2temp(x), sep = "\n")
},
name = "Ozone\nTemp"
)
Created on 2021-08-10 by the reprex package (v1.0.0)
I was trying to recreate this plot:
using the following code -
library(tidyverse)
set.seed(0); r <- rnorm(10000);
df <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- seq(from = avg - 3*SD, to = avg + 3*SD, by = SD)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
df %>% ggplot(aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = "")
Using the code I plotted this:
,
but this isn't near to the plot that I am trying to create. How do I make an additional axis with the X axis? How do I add the lines to automatically show the percentage of observations? Is there any way, that I can create the plot as nearly identical as possible using ggplot2?
Welcome to SO. Excellent first question!
It's actually quite tricky. You'd need to create a second plot (the second x axis) but it's not the most straight forward to align both perfectly.
I will be using Z.lin's amazing modification of the cowplot package.
I am not using the reprex package, because I think I'd need to define every single function (and I don't know how to use trace within reprex.)
library(tidyverse)
library(cowplot)
set.seed(0); r <- rnorm(10000);
foodf <- as.data.frame(r)
avg <- round(mean(r),2)
SD <- round(sd(r),2)
x.scale <- round(seq(from = avg - 3*SD, to = avg + 3*SD, by = SD), 1)
x.lab <- c("-3SD", "-2SD", "-1SD", "Mean", "1SD", "2SD", "3SD")
x2lab <- -3:3
# calculate the density manually
dens_r <- density(r)
# for each x value, calculate the closest x value in the density object and get the respective y values
y_dens <- dens_r$y[sapply(x.scale, function(x) which.min(abs(dens_r$x - x)))]
# added annotation for segments and labels.
# Arrow segments can be added in a similar way.
p1 <-
ggplot(foodf, aes(r)) +
geom_histogram(aes(y=..density..), bins = 20,
colour="black", fill="lightblue") +
geom_density(alpha=.2, fill="darkblue") +
scale_x_continuous(breaks = x.scale, labels = x.lab) +
labs(x = NULL) +# use NULL here
annotate(geom = "segment", x = x.scale, xend = x.scale,
yend = 1.1 * max(dens_r$y), y = y_dens, lty = 2 ) +
annotate(geom = "text", label = x.lab,
x = x.scale, y = 1.2 * max(dens_r$y))
p2 <-
ggplot(foodf, aes(r)) +
scale_x_continuous(breaks = x.scale, labels = x2lab) +
labs(x = NULL) +
theme_classic() +
theme(axis.line.y = element_blank())
# This is with the modified plot_grid() / align_plot() function!!!
plot_grid(p1, p2, ncol = 1, align = "v", rel_heights = c(1, 0.1))
I am trying to plot some data, fitted values and forecasts on a nice ggplot format but when I plot my data the way I think should work I get a gap between the real data and the forecast. The gap is meaningless but it would be nice if it was gone.
Some R code you can use to recreate my problem is:
library(xts)
library(tidyverse)
library(forecast)
dates <- seq(as.Date("2016-01-01"), length = 100, by = "days")
realdata <- arima.sim(model = list(ar = 0.7, order = c(1,1,0)), n = 99)
data <- xts(realdata, order.by = dates)
user_arima <- arima(data, order = c(1,1,0))
user_arimaf <- forecast(user_arima)
fits <- xts(user_arimaf$fitted, order.by = dates)
fcastdates <- as.Date(dates[100]) + 1:10
meancast <- xts(user_arimaf$mean[1:10], order.by = fcastdates)
lowercast95 <- xts(user_arimaf$lower[1:10], order.by = fcastdates)
uppercast95 <- xts(user_arimaf$upper[1:10], order.by = fcastdates)
frame <- merge(data, fits, meancast, uppercast95, lowercast95, all = TRUE, fill = NA)
frame <- as.data.frame(frame) %>%
mutate(date = as.Date(dates[1] + 0:(109)))
frame %>%
ggplot() +
geom_line(aes(date, data, color = "Data")) +
geom_line(aes(date, fits, color = "Fitted")) +
geom_line(aes(date, meancast, color = "Forecast")) +
geom_ribbon(aes(date, ymin=lowercast95,ymax=uppercast95),alpha=.25) +
scale_color_manual(values = c(
'Data' = 'black',
'Fitted' = 'red',
'Forecast' = 'darkblue')) +
labs(color = 'Legend') +
theme_classic() +
ylab("some data") +
xlab("Date") +
labs(title = "chart showing a gap",
subtitle = "Shaded area is the 95% CI from the ARIMA")
And the chart is below
I know there is a geom_forecast in ggplot now but I would like to build this particular plot the way i'm doing it. Although if there's no other solution to the gap then i'll use the geom_forecast.
Closing the gap requires providing a data point in the meancast column for the blank area. I guess it makes sense just to use the value for the last "real" data point.
# Grab the y-value corresponding to the date just before the gap.
last_data_value = frame[frame$date == as.Date("2016-04-09"), "data"]
# Construct a one-row data.frame.
extra_row = data.frame(data=NA_real_,
fits=NA_real_,
meancast=last_data_value,
uppercast95=last_data_value,
lowercast95=last_data_value,
date=as.Date("2016-04-09"))
# Add extra row to the main data.frame.
frame = rbind(frame, extra_row)
Forword: I provide a reasonably satisfactory answer to my own question. I understand this is acceptable practice. Naturally my hope is to invite suggestions and improvements.
My purpose is to plot two time series (stored in a dataframe with dates stored as class 'Date') and to fill the area between the data points with two different colors according to whether one is above the other. For instance, to plot an index of Bonds and an index of Stocks, and to fill the area in red when the Stock index is above the bond index, and to fill the area in blue otherwise.
I have used ggplot2 for this purpose, because I am reasonably familiar with the package (author: Hadley Wickham), but feel free to suggest other approaches. I wrote a custom function based on the geom_ribbon() function of the ggplot2 package. Early on I faced problems related to my lack of experience in handling the geom_ribbon() function and objects of class 'Date'. The function below represents my effort to solve these problems, almost surely it is roundabout, unecessarily complicated, clumsy, etc.. So my question is: Please suggest improvements and/or alternative approaches. Ultimately, it would be great to have a general-purpose function made available here.
Data:
set.seed(123456789)
df <- data.frame(
Date = seq.Date(as.Date("1950-01-01"), by = "1 month", length.out = 12*10),
Stocks = 100 + c(0, cumsum(runif(12*10-1, -30, 30))),
Bonds = 100 + c(0, cumsum(runif(12*10-1, -5, 5))))
library('reshape2')
df <- melt(df, id.vars = 'Date')
Custom Function:
## Function to plot geom_ribbon for class Date
geom_ribbon_date <- function(data, group, N = 1000) {
# convert column of class Date to numeric
x_Date <- as.numeric(data[, which(sapply(data, class) == "Date")])
# append numeric date to dataframe
data$Date.numeric <- x_Date
# ensure fill grid is as fine as data grid
N <- max(N, length(x_Date))
# generate a grid for fill
seq_x_Date <- seq(min(x_Date), max(x_Date), length.out = N)
# ensure the grouping variable is a factor
group <- factor(group)
# create a dataframe of min and max
area <- Map(function(z) {
d <- data[group == z,];
approxfun(d$Date.numeric, d$value)(seq_x_Date);
}, levels(group))
# create a categorical variable for the max
maxcat <- apply(do.call('cbind', area), 1, which.max)
# output a dataframe with x, ymin, ymax, is. max 'dummy', and group
df <- data.frame(x = seq_x_Date,
ymin = do.call('pmin', area),
ymax = do.call('pmax', area),
is.max = levels(group)[maxcat],
group = cumsum(c(1, diff(maxcat) != 0))
)
# convert back numeric dates to column of class Date
df$x <- as.Date(df$x, origin = "1970-01-01")
# create and return the geom_ribbon
gr <- geom_ribbon(data = df, aes(x, ymin = ymin, ymax = ymax, fill = is.max, group = group), inherit.aes = FALSE)
return(gr)
}
Usage:
ggplot(data = df, aes(x = Date, y = value, group = variable, colour = variable)) +
geom_ribbon_date(data = df, group = df$variable) +
theme_bw() +
xlab(NULL) +
ylab(NULL) +
ggtitle("Bonds Versus Stocks (Fake Data!)") +
scale_fill_manual('is.max', breaks = c('Stocks', 'Bonds'),
values = c('darkblue','darkred')) +
theme(legend.position = 'right', legend.direction = 'vertical') +
theme(legend.title = element_blank()) +
theme(legend.key = element_blank())
Result:
While there are related questions and answers on stackoverflow, I haven't found one that was sufficiently detailed for my purpose. Here is a selection of useful exchanges:
create-geom-ribbon-for-min-max-range: Asks a similar question, but provides less detail than I was looking for.
possible-bug-in-geom-ribbon: Closely related, but intermediate steps on how to compute max/min are missing.
fill-region-between-two-loess-smoothed-lines-in-r-with-ggplot: Closely related, but focuses on loess lines. Excellent.
ggplot-colouring-areas-between-density-lines-according-to-relative-position : Closely related, but focuses on densities. This post greatly inspired me.
Perhaps I'm not understanding your full problem but it seems that a fairly direct approach would be to define a third line as the minimum of the two time series at each time point. geom_ribbon is then called twice (once for each unique value of Asset) to plot the ribbons formed by each of the series and the minimum line. Code could look like:
set.seed(123456789)
df <- data.frame(
Date = seq.Date(as.Date("1950-01-01"), by = "1 month", length.out = 12*10),
Stocks = 100 + c(0, cumsum(runif(12*10-1, -30, 30))),
Bonds = 100 + c(0, cumsum(runif(12*10-1, -5, 5))))
library(reshape2)
library(ggplot2)
df <- cbind(df,min_line=pmin(df[,2],df[,3]) )
df <- melt(df, id.vars=c("Date","min_line"), variable.name="Assets", value.name="Prices")
sp <- ggplot(data=df, aes(x=Date, fill=Assets))
sp <- sp + geom_ribbon(aes(ymax=Prices, ymin=min_line))
sp <- sp + scale_fill_manual(values=c(Stocks="darkred", Bonds="darkblue"))
sp <- sp + ggtitle("Bonds Versus Stocks (Fake Data!)")
plot(sp)
This produces following chart:
I actually had the same question some time ago and here is the related post. It defines a function finding the intersections between two lines and an other function which takes a dataframe in input and then colors the space between the two columns using matplotand polygon
EDIT
Here is the code, modified a bit to allow the last polygon to be plotted
set.seed(123456789)
dat <- data.frame(
Date = seq.Date(as.Date("1950-01-01"), by = "1 month", length.out = 12*10),
Stocks = 100 + c(0, cumsum(runif(12*10-1, -30, 30))),
Bonds = 100 + c(0, cumsum(runif(12*10-1, -5, 5))))
intersects <- function(x1, x2) {
seg1 <- which(!!diff(x1 > x2)) # location of first point in crossing segments
above <- x2[seg1] > x1[seg1] # which curve is above prior to crossing
slope1 <- x1[seg1+1] - x1[seg1]
slope2 <- x2[seg1+1] - x2[seg1]
x <- seg1 + ((x2[seg1] - x1[seg1]) / (slope1 - slope2))
y <- x1[seg1] + slope1*(x - seg1)
data.frame(x=x, y=y, pindex=seg1, pabove=(1:2)[above+1L])
# pabove is greater curve prior to crossing
}
fillColor <- function(data, addLines=TRUE) {
## Find points of intersections
ints <- intersects(data[,2], data[,3]) # because the first column is for Dates
intervals <- findInterval(1:nrow(data), c(0, ints$x))
## Make plot
matplot(data, type="n", col=2:3, lty=1, lwd=4,xaxt='n',xlab='Date')
axis(1,at=seq(1,dim(data)[1],length.out=12),
labels=data[,1][seq(1,dim(data)[1],length.out=12)])
legend("topright", c(colnames(data)[2], colnames(data)[3]), col=3:2, lty=1, lwd=2)
## Draw the polygons
for (i in seq_along(table(intervals))) {
xstart <- ifelse(i == 1, 0, ints$x[i-1])
ystart <- ifelse(i == 1, data[1,2], ints$y[i-1])
xend <- ints$x[i]
yend <- ints$y[i]
x <- seq(nrow(data))[intervals == i]
polygon(c(xstart, x, xend, rev(x)), c(ystart, data[x,2], yend, rev(data[x,3])),
col=ints$pabove[i]%%2+2)
}
# add end of plot
xstart <- ints[dim(ints)[1],1]
ystart <- ints[dim(ints)[1],2]
xend <- nrow(data)
yend <- data[dim(data)[1],2]
x <- seq(nrow(data))[intervals == max(intervals)]
polygon(c(xstart, x, xend, rev(x)), c(ystart, data[x,2], yend, rev(data[x,3])),
col=ints[dim(ints)[1]-1,4]%%2+2)
## Add lines for curves
if (addLines)
invisible(lapply(1:2, function(x) lines(seq(nrow(data)), data[,x], col=x%%2+2, lwd=2)))
}
## Plot the data
fillColor(dat,FALSE)
and the final result is this (with the same data used for the question)
#walts answer should remain the winner but while implementing his solution, I gave it a tidy update.
library(tidyverse)
set.seed(2345)
# fake data
raw_data <-
tibble(
date = as.Date("2020-01-01") + (1:40),
a = 95 + cumsum(runif(40, min = -20, max = 20)),
b = 55 + cumsum(runif(40, min = -1, max = 1))
)
# the steps
# the 'y' + 'min_line' + 'group' is the right granularity (by date) to
# create 2 separate ribbons
df <-
raw_data %>%
# find min of the two columns
mutate(min_line = pmin(a, b)) %>%
pivot_longer(c(a, b), names_to = "group", values_to = "y") %>%
print()
# the result
ggplot(data = df, aes(x = date, fill = group)) +
geom_ribbon(aes(ymax = y, ymin = min_line)) +
theme_classic()
another option using ggh4x - requires the data to be wide with y for lines 1 and 2 in different columns.
library(ggh4x)
#> Loading required package: ggplot2
set.seed(123456789)
df <- data.frame(
Date = seq.Date(as.Date("1950-01-01"), by = "1 month", length.out = 12*10),
Stocks = 100 + c(0, cumsum(runif(12*10-1, -30, 30))),
Bonds = 100 + c(0, cumsum(runif(12*10-1, -5, 5))))
## The data frame is NOT made long!!
ggplot(data = df, aes(x = Date)) +
stat_difference(aes(ymin = Stocks, ymax = Bonds)) +
scale_fill_brewer(palette = "Set1")
Created on 2022-11-24 with reprex v2.0.2
In the "graphics" package one can add a second x-axis (indicating the percentiles of the distribution) to a histogram as follows:
x <- rnorm(1000)
hist(x, main="", xlab="Bias")
perc <- quantile(x, seq(from=.00, to=1, by=.1))
axis(1,at=perc,labels=c("0","10%","20%","30%","40%","50%","60%","70%","80%","90%","100%"),cex=0.5, pos= -90)
That looks awkward, of course. So how can I modify the following ggplot2 code to add a second x-axis, shwing the percentiles, while the first x-axis should indicate the raw values?:
library(ggplot2)
theme_classic(base_size = 12, base_family = "")
x <- rnorm(1000)
qplot(x, main="", xlab="Bias")
perc <- quantile(x, seq(from=.00, to=1, by=.1))
Any help? Many thanks in advance!
I'm not entirely certain what you're after, since your first example doesn't actually produce what you describe.
But in terms of simply adding the percentage along with the raw value along the x axis, the easiest strategy would probably be to simply combine the two with a line break in a single set of labels:
dat <- data.frame(x = rnorm(1000))
perc <- quantile(dat$x,seq(from = 0,to = 1,by = 0.1))
l <- paste(round(perc,1),names(perc),sep = "\n")
> ggplot(dat,aes(x = x)) +
geom_histogram() +
scale_x_continuous(breaks = perc,labels = l)
Here's another approach which uses annotate(...) and does not require that the two scales have the same breaks.
library(ggplot2)
library(grid)
set.seed(123)
x <- rnorm(1000)
perc <- quantile(x, seq(from=.00, to=1, by=.1))
labs <- gsub("\\%","",names(perc)) # strip "%" from names
yval <- hist(x,breaks=30,plot=F)$count
yrng <- diff(range(yval))
g1 <- ggplot() +
geom_histogram(aes(x=x))+
xlim(range(x))+
coord_cartesian(ylim=c(0,1.1*max(yval)))+
labs(x="")+
annotate(geom = "text", x = perc, y = -0.1*yrng, label = labs, size=4) +
annotate(geom = "text", x=0, y=-0.16*yrng, label="Bias", size=4.5)+
theme(plot.margin = unit(c(1, 1, 2, 1), "lines"))
g2 <- ggplot_gtable(ggplot_build(g1))
g2$layout$clip[g2$layout$name == "panel"] <- "off"
grid.draw(g2)
This adds the second x-axis and the label using annotate(...). The last three lines of code turn off clipping of the viewport. Otherwise the annotations aren't visible.
Credit to #Henrik for his answer to this question.