Manually Set Scale of contour plot using geom_contour_filled - r

I would like manually adjust the scales of two contour plots such that each have the same scale even though they contain different ranges of values in the z-direction.
For instance, lets say that I want to make contour plots of z1 and z2:
x = 1:15
y = 1:15
z1 = x %*% t(y)
z2 = 50+1.5*(x %*% t(y))
data <- data.frame(
x = as.vector(col(z1)),
y = as.vector(row(z1)),
z1 = as.vector(z1),
z2 = as.vector(z2)
)
ggplot(data, aes(x, y, z = z1)) +
geom_contour_filled(bins = 8)
ggplot(data, aes(x, y, z = z2)) +
geom_contour_filled(bins = 8)
Is there a way I can manually adjust the scale of each plot such that each contain the same number of levels (in this case bins = 8), the minimum is the same for both (in this case min(z1)), and the max is the same for both (max(z2))?

One can manually define a vector of desired breaks points and then pass the vector to the "breaks" option in the geom_contour_filled() function.
In the below script, finds 8 break intervals between the grand minimum and the grand maximum of the dataset.
Also there are 2 functions defined to create the palette and label names for the legend.
#establish the min and max of scale
grandmin <- min(z1, z2)-1
grandmax <- max(z2, z2)
#define the number of breaks. In this case 8 +1
mybreaks <- seq(grandmin, ceiling(round(grandmax, 0)), length.out = 9)
#Function to return the dersired number of colors
mycolors<- function(x) {
colors<-colorRampPalette(c("darkblue", "yellow"))( 8 )
colors[1:x]
}
#Function to create labels for legend
breaklabel <- function(x){
labels<- paste0(mybreaks[1:8], "-", mybreaks[2:9])
labels[1:x]
}
ggplot(data, aes(x, y, z = z1)) +
geom_contour_filled(breaks= mybreaks, show.legend = TRUE) +
scale_fill_manual(palette=mycolors, values=breaklabel(8), name="Value", drop=FALSE) +
theme(legend.position = "right")
ggplot(data, aes(x, y, z = z2)) +
geom_contour_filled(breaks= mybreaks, show.legend = TRUE) +
scale_fill_manual(palette=mycolors, values=breaklabel(8), name="Value", drop=FALSE)

Related

Make ggplot with regression line and normal distribution overlay

I am trying to make a plot to show the intuition behind logistic (or probit) regression. How would I make a plot that looks something like this in ggplot?
(Wolf & Best, The Sage Handbook of Regression Analysis and Causal Inference, 2015, p. 155)
Actually, what I would rather even do is have one single normal distribution displayed along the y axis with mean = 0, and a specific variance, so that I can draw horizontal lines going from the linear predictor to the y axis and sideways normal distribution. Something like this:
What this is supposed to show (assuming I haven't misunderstood something) is . I haven't had much success so far...
library(ggplot2)
x <- seq(1, 11, 1)
y <- x*0.5
x <- x - mean(x)
y <- y - mean(y)
df <- data.frame(x, y)
# Probability density function of a normal logistic distribution
pdfDeltaFun <- function(x) {
prob = (exp(x)/(1 + exp(x))^2)
return(prob)
}
# Tried switching the x and y to be able to turn the
# distribution overlay 90 degrees with coord_flip()
ggplot(df, aes(x = y, y = x)) +
geom_point() +
geom_line() +
stat_function(fun = pdfDeltaFun)+
coord_flip()
I think this comes pretty close to the first illustration you give. If this is a thing you don't need to repeat many times, it is probably best to compute the density curves prior to plotting and use a seperate dataframe to plot these.
library(ggplot2)
x <- seq(1, 11, 1)
y <- x*0.5
x <- x - mean(x)
y <- y - mean(y)
df <- data.frame(x, y)
# For every row in `df`, compute a rotated normal density centered at `y` and shifted by `x`
curves <- lapply(seq_len(NROW(df)), function(i) {
mu <- df$y[i]
range <- mu + c(-3, 3)
seq <- seq(range[1], range[2], length.out = 100)
data.frame(
x = -1 * dnorm(seq, mean = mu) + df$x[i],
y = seq,
grp = i
)
})
# Combine above densities in one data.frame
curves <- do.call(rbind, curves)
ggplot(df, aes(x, y)) +
geom_point() +
geom_line() +
# The path draws the curve
geom_path(data = curves, aes(group = grp)) +
# The polygon does the shading. We can use `oob_squish()` to set a range.
geom_polygon(data = curves, aes(y = scales::oob_squish(y, c(0, Inf)),group = grp))
The second illustration is pretty close to your code. I simplified your density function by the standard normal density function and added some extra paramters to stat function:
library(ggplot2)
x <- seq(1, 11, 1)
y <- x*0.5
x <- x - mean(x)
y <- y - mean(y)
df <- data.frame(x, y)
ggplot(df, aes(x, y)) +
geom_point() +
geom_line() +
stat_function(fun = dnorm,
aes(x = after_stat(-y * 4 - 5), y = after_stat(x)),
xlim = range(df$y)) +
# We fill with a polygon, squishing the y-range
stat_function(fun = dnorm, geom = "polygon",
aes(x = after_stat(-y * 4 - 5),
y = after_stat(scales::oob_squish(x, c(-Inf, -1)))),
xlim = range(df$y))

Dynamically creating unit labels (K, Mn, Bn, Tn) + adjusting axis limit for dual-axis ggplot2 graph?

I have a date-time dataset with two columns y1 and y2. y2 is always bigger than y1 by an unknown order of magnitude, and both y2 and y1 can have zeroes. When y2 is really big (in the hundreds of billions), reading it's y-axis is tedious (even with commas)- I want to be able to change 1,000 to K, 1,000,000 to M. 10,000,000 to 10M, and so fourth.
Optional/Bonus- I want change the y-axis for either plots to focus/ show the range of y for which data is available. i.e.- if y2 has values that are all above 1,000,000; don't plot a graph starting from zero for this column. Likewise for y1.
Data:
df = data.frame('date' = c(seq(as.Date('2019-01-01'), as.Date('2019-02-01'), 'day')))
df$y1 = runif(nrow(df))
df$y2 = runif(nrow(df)) + 10000000
.
My Current Approach:
Calculating how much to scale y2 by:
#Calculates nearest power of 10
log10_ceiling <- function(x) {
10^(ceiling(log10(x)))
}
scale = df$y2 / df$y1
scale = scale[!is.na(scale)]
scale = scale[!is.infinite(scale)]
scale = log10_ceiling(max(scale))/100
Plotting:
p <- ggplot(df, aes(x = date))
p <- p + geom_line(aes(y = y1, colour = "y1"))
p <- p + geom_line(aes(y = df$y2/scale, colour = "y2"))+
scale_y_continuous(sec.axis = sec_axis(trans = ~.*scale, name = "y2"))
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "y1",
x = "date",
colour = "Parameter")
p <- p + theme(legend.position = c(0.8, 0.9))
p
I am aware of label=unit_format(unit = "B"), but don't know how to change it dynamically to M, or handle cases where the scale is 10B instead of 1B.
Edit- a problem with how I'm currently calculating the scale is y2 is always plotted to the next order of magnitude. i.e.- if y2 values fall between 10 and 11 for instance, the window shows y2 from 0 to 100.

Add jitterred points to a ggnetwork plot

I have a graph of vertices and edges which I'd like to plot using a fruchtermanreingold layout.
Here's the graph edges matrix:
edge.mat <- matrix(as.numeric(strsplit("3651,0,0,1,0,0,0,0,2,0,11,2,0,0,0,300,0,1,0,0,66,0,78,9,0,0,0,0,0,0,11690,0,1,0,0,0,0,0,0,0,0,493,1,1,0,4288,5,0,0,36,0,9,7,3,0,6,1,0,1,7,490,0,0,0,6,0,0,628,6,12,0,0,0,0,0,641,0,0,4,0,0,0,0,0,0,66,0,0,0,0,3165,0,281,0,0,0,0,0,0,0,0,45,1,0,0,35248,0,1698,2,0,1,0,2,99,0,0,6,29,286,0,31987,0,1,10,0,8,0,16,0,21,1,0,0,1718,0,51234,0,0,17,3,12,0,0,7,0,0,0,1,0,2,16736,0,0,0,3,0,0,4,630,0,0,0,9,0,0,29495,53,6,0,0,0,0,5,0,0,0,0,3,0,19,186,0,0,0,482,8,12,0,1,0,7,1,0,6,0,26338",
split = ",")[[1]]),
nrow = 14,
dimnames = list(LETTERS[1:14], LETTERS[1:14]))
I then create an igraph object from that using:
gr <- igraph::graph_from_adjacency_matrix(edge.mat, mode="undirected", weighted=T, diag=F)
And then use ggnetwork to convert gr to a data.frame, with specified vertex colors:
set.seed(1)
gr.df <- ggnetwork::ggnetwork(gr,
layout="fruchtermanreingold",
weights="weight",
niter=50000,
arrow.gap=0)
And then I plot it using ggplot2 and ggnetwork:
vertex.colors <- strsplit("#00BE6B,#DC2D00,#F57962,#EE8044,#A6A400,#62B200,#FF6C91,#F77769,#EA8332,#DA8E00,#C59900,#00ACFC,#C49A00,#DC8D00",
split=",")[[1]]
library(ggplot2)
library(ggnetwork)
ggplot(gr.df, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "gray", aes(size = weight)) +
geom_nodes(color = "black")+
geom_nodelabel(aes(label = vertex.names),
color = vertex.colors, fontface = "bold")+
theme_minimal() +
theme(axis.text=element_blank(),
axis.title=element_blank(),
legend.position="none")
In my case each vertex actually represents many points, where each vertex has a different number of points. Adding that information to gr.df:
gr.df$n <- NA
gr.df$n[which(is.na(gr.df$weight))] <- as.integer(runif(length(which(is.na(gr.df$weight))), 100, 500))
What I'd like to do is add to the plot gr.df$n radially jittered points around each vertex (i.e., with its corresponding n), with the same vertex.colors coding. Any idea how to do that?
I think sampling and then plotting with geom_point is a reasonable strategy. (otherwise you could create your own geom).
Here is some rough code, starting from the relevant bit of your question
gr.df$n <- 1
gr.df$n[which(is.na(gr.df$weight))] <- as.integer(runif(length(which(is.na(gr.df$weight))), 100, 500))
# function to sample
# https://stackoverflow.com/questions/5837572/generate-a-random-point-within-a-circle-uniformly
circSamp <- function(x, y, R=0.1){
n <- length(x)
A <- a <- runif(n,0,1)
b <- runif(n,0,1)
ind <- b < a
a[ind] <- b[ind]
b[ind] <- A[ind]
xn = x+b*R*cos(2*pi*a/b)
yn = y+b*R*sin(2*pi*a/b)
cbind(x=xn, y=yn)
}
# sample
d <- with(gr.df, data.frame(vertex.names=rep(vertex.names, n),
circSamp(rep(x,n), rep(y,n))))
# p is your plot
p + geom_point(data=d, aes(x, y, color = vertex.names),
alpha=0.1, inherit.aes = FALSE) +
scale_color_manual(values = vertex.colors)
Giving

Transform color scale to probability-transformed color distribution with scale_fill_gradientn()

I am trying to visualize heavily tailed raster data, and I would like a non-linear mapping of colors to the range of the values. There are a couple of similar questions, but they don't really solve my specific problem (see links below).
library(ggplot2)
library(scales)
set.seed(42)
dat <- data.frame(
x = floor(runif(10000, min=1, max=100)),
y = floor(runif(10000, min=2, max=1000)),
z = rlnorm(10000, 1, 1) )
# colors for the colour scale:
col.pal <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan", "#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))
fill.colors <- col.pal(64)
This is how the data look like if not transformed in some way:
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors)
My question is sort of a follow-up question related to
this one or this one , and the solution given here actually yields exactly the plot I want, except for the legend:
qn <- rescale(quantile(dat$z, probs=seq(0, 1, length.out=length(fill.colors))))
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors, values = qn)
Now I want the colour scale in the legend to represent the non-linear distribution of the values (now only the red part of the scale is visible), i.e. the legend should as well be based on quantiles. Is there a way to accomplish this?
I thought the trans argument within the colour scale might do the trick, as suggested here , but that throws an error, I think because qnorm(pnorm(dat$z)) results in some infinite values (I don't completely understand the function though..).
norm_trans <- function(){
trans_new('norm', function(x) pnorm(x), function(x) qnorm(x))
}
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors, trans = 'norm')
> Error in seq.default(from = best$lmin, to = best$lmax, by = best$lstep) : 'from' must be of length 1
So, does anybody know how to have a quantile-based colour distribution in the plot and in the legend?
This code will make manual breaks with a pnorm transformation. Is this what you are after?
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors,
trans = 'norm',
breaks = quantile(dat$z, probs = c(0, 0.25, 1))
)
I believe you have been looking for a quantile transform. Unfortunately there is none in scales, but it is not to hard to build one yourself (on the fly):
make_quantile_trans <- function(x, format = scales::label_number()) {
name <- paste0("quantiles_of_", deparse1(substitute(x)))
xs <- sort(x)
N <- length(xs)
transform <- function(x) findInterval(x, xs)/N # find the last element that is smaller
inverse <- function(q) xs[1+floor(q*(N-1))]
scales::trans_new(
name = name,
transform = transform,
inverse = inverse,
breaks = function(x, n = 5) inverse(scales::extended_breaks()(transform(x), n)),
minor_breaks = function(x, n = 5) inverse(scales::regular_minor_breaks()(transform(x), n)),
format = format,
domain = xs[c(1, N)]
)
}
ggplot(dat, aes(x = x, y = y, fill = z)) +
geom_tile(width=2, height=30) +
scale_fill_gradientn(colours=fill.colors, trans = make_quantile_trans(dat$z))
Created on 2021-11-12 by the reprex package (v2.0.1)

How to plot a heat map with irregular data in ordinates in ggplot?

Suppose I want to plot the following data:
# First set of X coordinates
x <- seq(0, 10, by = 0.2)
# Angles from 0 to 90 degrees
angles <- seq(0, 90, length.out = 10)
# Convert to radian
angles <- deg2rad(angles)
# Create an empty data frame
my.df <- data.frame()
# For each angle, populate the data frame
for (theta in angles) {
y <- sin(x + theta)
tmp <- data.frame(x = x, y = y, theta = as.factor(theta))
my.df <- rbind(my.df, tmp)
}
x1 <- seq(0, 12, by = 0.3)
y1 <- sin(x1 - 0.5)
tmp <- data.frame(x = x1, y = y1, theta = as.factor(-0.5))
my.df <- rbind(my.df, tmp)
ggplot(my.df, aes(x, y, color = theta)) + geom_line()
That gives me a nice plot:
Now I want to draw a heat map out of this data set. There are tutorials here and there that do it using geom_tile to do it.
So, let's try:
# Convert the angle values from factors to numerics
my.df$theta <- as.numeric(levels(my.df$theta))[my.df$theta]
ggplot(my.df, aes(theta, x)) + geom_tile(aes(fill = y)) + scale_fill_gradient(low = "blue", high = "red")
That does not work, and the reason is that my x coordinates do not have the same step:
x <- seq(0, 10, by = 0.2) vs x1 <- seq(0, 12, by = 0.3)
But as soon as I use the same step x1 <- seq(0, 12, by = 0.2), it works:
I real life, my data sets are not regularly spaced (these are experimental data), but I still need to display them as a heat map. How can I do?
You can use akima to interpolate the function into a form suitable for heat map plots.
library(akima)
library(ggplot2)
my.df.interp <- interp(x = my.df$theta, y = my.df$x, z = my.df$y, nx = 30, ny = 30)
my.df.interp.xyz <- as.data.frame(interp2xyz(my.df.interp))
names(my.df.interp.xyz) <- c("theta", "x", "y")
ggplot(my.df.interp.xyz, aes(x = theta, y = x, fill = y)) + geom_tile() +
scale_fill_gradient(low = "blue", high = "red")
If you wish to use a different resolution you can change the nx and ny arguments to interp.
Another way to do it with just ggplot2 is to use stat_summary_2d.
library(ggplot2)
ggplot(my.df, aes(x = theta, y = x, z = y)) + stat_summary_2d(binwidth = 0.3) +
scale_fill_gradient(low = "blue", high = "red")

Resources