Geom_Hex coloring by 3rd variable - r

I'm trying to create a normal hexbin plot but instead of coloring the plot by count, the default, I would like to have it colored by the average value of a third variable. In my particular case, I cannot use the stat_summary_hex function.
library(ggplot2)
library(hexbin)
x <- rnorm(1e4, 0, 5)
y <- rnorm(1e4, 0, 10)
z <- rnorm(1e4, 20, 1)
data.frame(x, y, z) %>%
ggplot(mapping = aes(x = x, y = y, z = z)) +
geom_hex(bins = 20)

You can use the following code:
library(ggplot2)
library(hexbin)
library(ggraph)
x <- rnorm(1e4, 0, 5)
y <- rnorm(1e4, 0, 10)
z <- rnorm(1e4, 20, 1)
data.frame(x, y, z) %>%
ggplot(aes(x, y, z=z)) +
stat_summary_hex(fun = function(x) mean(x), bins = 20) +
scale_fill_viridis_c(option = "magma")
ggplotly()
Output:

This is tricky. You need to assign the fill value after the stat is computed. You can do this by precalculating the hexbins that the stat is going to produce, and using hexTapply to get the average of z in each cell.
hb <- hexbin(x, y, bins = 30)
data.frame(x, y, z) %>%
ggplot(mapping = aes(x = x, y = y, weight = z)) +
geom_hex(aes(fill = after_stat(hexTapply(hb, z, mean))), bins = 30) +
scale_fill_viridis_c(option = "magma")

Related

How to draw a horizontal line and a vertical line that cross at the intersection with the line of a plot with ggplot2?

I made a plot with points connected with a line. In this plot I want to draw a vertical line from a point on the x-axis to the intersection with the plot line.And at the intersection, I want to draw a horizontal line to the Y-axis. I have searched several websites, forums and tutorials, but I still can't do it. Any help?
library(ggplot2)
X <- 1:5
Y <- c(2, 6, 4, 7, 12)
DF <- data.frame(X, Y)
ggplot(data = DF,
aes(x = X,
y = Y)) +
geom_point() +
geom_line() +
geom_vline(xintercept = 4.5,
linetype = 2)
Result so far:
Example of desired result:
Already commented by #Eric but also sort of answered by #akrun in link, by applyng the apply function to the function of #akrun you can also plot all the points of the DF, something like this would work for you:
library(ggplot2)
X <- 1:5
Y <- c(2, 6, 4, 7, 12)
DF <- data.frame(X, Y)
draw_guides<- function(x, y) {
list(geom_segment(aes(x = -Inf, xend = x, y = y, yend = y), linetype = "dashed"),
geom_segment(aes(x = x, xend = x, y = y, yend = -Inf), linetype = "dashed"))
}
ggplot(data = DF,
aes(x = X,
y = Y)) +
geom_point() +
geom_line() +
apply(DF, 1, function(y) draw_guides(y[1], y[2]))
As already mentioned by #Eric in his comment geom_segment is the way to go to achieve your desired result. Moreover, you have to manually compute the y value where the segments should cut the geom_line which could be achieved using approx. As quick approach may look like so:
library(ggplot2)
X <- 1:5
Y <- c(2, 6, 4, 7, 12)
DF <- data.frame(X, Y)
# vertical line
vsegment <- function(x, X, Y) {
geom_segment(aes(x = x, xend = x, y = -Inf, yend = approx(X, Y, x)$y),
linetype = 2)
}
# horizontal line
hsegment <- function(x, X, Y) {
geom_segment(aes(x = -Inf, xend = x, y = approx(X, Y, x)$y, yend = approx(X, Y, x)$y),
linetype = 2)
}
ggplot(data = DF,
aes(x = X,
y = Y)) +
geom_point() +
geom_line() +
vsegment(4.5, X, Y) +
hsegment(4.5, X, Y)
You can use geom_path() as well for this
X <- 1:5
Y <- c(2, 6, 4, 7, 12)
DF <- data.frame(X, Y)
ggplot(data = DF, aes(x = X, y = Y)) +
geom_point() +
geom_line() +
geom_path(data = data.frame(x = c(-Inf, 4.5, 4.5), y = c(approx(X, Y, 4.5)$y, approx(X, Y, 4.5)$y, -Inf)), aes(x, y), color = "red", linetype = 2)
If you want it more flexible for more intercepts you can use this function.
Note the ... part, so you can pass the geom_path arguments along, like color, linetype, size, etc. It supports an intercept based on the x value or based on the y value.
my_intercept <- function(x, y, ...) {
if (!missing(x)) dt <- data.frame(x = c(-Inf, x, x), y = c(approx(X, Y, x)$y, approx(X, Y, x)$y, -Inf))
if (!missing(y)) dt <- data.frame(x = c(-Inf, approx(Y, X, y)$y, approx(Y, X, y)$y), y = c(y, y, -Inf))
geom_path(data = dt, aes(x, y), ...)
}
ggplot(data = DF, aes(x = X, y = Y)) +
geom_point() +
geom_line() +
my_intercept(x = 4.5, color = "blue", linetype = 2) +
my_intercept(y = 5, color = "red", linetype = 4)

Time series bar graph reordering

I have the following script I'm working on, I want to re order the bar graph in descending order by their values.
library(tidyverse)
library(lubridate)
library(ggplot2)
#df <- read_csv('dataframe.csv')
df %>%
mutate(date=mdy(date), year=year(date), year = year + (date >= mdy(paste0("10/01/", year))))%>%
group_by(year) %>%
summarize(avg = mean(flow)) -> df
y <- df$avg
x <- ymd(sprintf("%d-01-01",df$year))
d <- data.frame(x = x, y = y)
# interpolate values from zero to y and create corresponding number of x values
vals <- lapply(d$y, function(y) seq(0, y, by = 0.1))
y <- unlist(vals)
mid <- rep(d$x, lengths(vals))
d2 <- data.frame(x = mid - 100,
xend = mid + 100,
y = y,
yend = y)
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2)
Results
I want to reorder the bars in descending order by values
The dataset can be found through the following link
https://drive.google.com/file/d/11PVub9avzMFhUz02cHfceGh9DrlVQDbD/view?usp=sharing
the output I'm looking for is like this.
Kindly assist.
To arrange the data you need to adjust the factor levels. You could arrange the data based on avg column and change year to factor.
library(dplyr)
library(ggplot2)
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg) + geom_col(aes(fill = 'red')) + guides(fill=FALSE)
Or :
df %>%
arrange(desc(avg)) %>%
mutate(year = factor(year, unique(year))) %>%
ggplot() + aes(year, avg, fill = avg) + geom_col()
Try this:
library(scales)
#Custom Transform function
dttrans <- function(a, b, breaks = b$breaks, format = b$format) {
a <- as.trans(a)
b <- as.trans(b)
name <- paste(a$name, b$name, sep = "-")
trans <- function(x) a$trans(b$trans(x))
inv <- function(x) b$inverse(a$inverse(x))
trans_new(name, trans, inv, breaks, format = format)
}
ggplot(data = d2, aes(x = x, xend = xend, y = y, yend = yend, color = y)) +
geom_segment(size = 2) +
scale_x_continuous(trans = dttrans("reverse", "date"))
Credits: Mikko Marttila

How to put variables in legend in ggplot2

I want to get the following plot.
So, how would I put a variable i.e. cov(x,y) as string in legend using ggplot?
I would recommend calculating the covariance in a separate data frame, and customizing the color scale using the values in the covariance data frame:
Sample Data
library(dplyr)
library(ggplot2)
set.seed(999)
d <- data.frame(
x = runif(60, 0, 100),
z = rep(c(0, 1), each = 30)
) %>%
mutate(
y = x + 50 * z + rnorm(60, sd = 50),
z = factor(z)
)
Here is the basic plot, with a separate color for each value of z:
ggplot(d, aes(x = x, y = y, color = z)) +
geom_point() +
stat_smooth(method = "lm", se = FALSE)
Now create a smaller data frame that contains covariance values:
cov_df <- d %>%
group_by(z) %>%
summarise(covar = round(cov(x, y)))
Extract the covariance values and store as a character vector:
legend_text <- as.character(pull(cov_df, covar))
Control the color scale to achieve your desired outcome:
ggplot(d, aes(x = x, y = y, color = z)) +
geom_point() +
stat_smooth(method = "lm", se = FALSE) +
scale_color_discrete(
"Covariance",
labels = legend_text
)

Common legend for several geom area ggplot

I created a plot with several geom_area according to the following code :
library(ggplot2)
set.seed(1)
dat <- data.frame(matrix(rnorm(100, 10, 2), 100, 1))
dat_density <- data.frame(density(dat[, 1])[c("x", "y")])
quant <- quantile(dat[, 1], probs = seq(0, 1, 0.10))
library(RColorBrewer)
color_pal <- brewer.pal(length(quant)-1, "RdYlBu")
dens <- ggplot(data = dat_density, aes(x = x, y = y)) +
geom_line(size = 2)
for(i in 1:(length(color_pal))){
dens <- dens +
geom_area(data = subset(dat_density, x > quant[[i]] & x < quant[[i + 1]]), fill = color_pal[i])
}
dens
How can I add a common legend with each color of the color_pal vector (corresponding to all the 10% area of data) ?
The easiest way is to define the groups in your dataset
dat_density$quant <- cut(dat_density$x, breaks = c(-Inf, quant, Inf))
ggplot(data = dat_density, aes(x = x, y = y, fill = quant)) +
geom_line(size = 2) +
geom_area() +
scale_fill_brewer(palette = "RdYlBu")

Continuous colour of geom_line according to y value

If you look at this
ggplot(mtcars,aes(x=disp,y=mpg,colour=mpg))+geom_line()
you will see that the line colour varies according to the corresponding y value, which is what I want, but only section-by-section. I would like the colour to vary continuously according to the y value. Any easy way?
One possibility which comes to mind would be to use interpolation to create more x- and y-values, and thereby make the colours more continuous. I use approx to " linearly interpolate given data points". Here's an example on a simpler data set:
# original data and corresponding plot
df <- data.frame(x = 1:3, y = c(3, 1, 4))
library(ggplot2)
ggplot(data = df, aes(x = x, y = y, colour = y)) +
geom_line(size = 3)
# interpolation to make 'more values' and a smoother colour gradient
vals <- approx(x = df$x, y = df$y)
df2 <- data.frame(x = vals$x, y = vals$y)
ggplot(data = df2, aes(x = x, y = y, colour = y)) +
geom_line(size = 3)
If you wish the gradient to be even smoother, you may use the n argument in approx to adjust the number of points to be created ("interpolation takes place at n equally spaced points spanning the interval [min(x), max(x)]"). With a larger number of values, perhaps geom_point gives a smoother appearance:
vals <- approx(x = df$x, y = df$y, n = 500)
df2 <- data.frame(x = vals$x, y = vals$y)
ggplot(data = df2, aes(x = x, y = y, colour = y)) +
geom_point(size = 3)
Since ggplot2 v0.8.5 one can use geom_line or geom_path with different lineend options (right now there are three options: round, butt and square). Selection depends on the nature of the data.
round would work on sharp edges (like in given OPs data):
library(ggplot2)
ggplot(mtcars, aes(disp, mpg, color = mpg)) +
geom_line(size = 3, lineend = "round")
square would work on a more continuous variable:
df <- data.frame(x = seq(0, 100, 10), y = seq(0, 100, 10) ^ 2)
ggplot(data = df, aes(x = x, y = y, colour = y)) +
geom_path(size = 3, lineend = "square")
Maybe this will work for you:
library(dplyr)
library(ggplot2)
my_mtcars <-
mtcars %>%
mutate(my_colors = cut(disp, breaks = c(0, 130, 200, 400, Inf)))
ggplot(my_mtcars, aes(x = disp, y = mpg, col = mpg)) +
geom_line() + facet_wrap(~ my_colors, scales = 'free_x')

Resources