scales_fill_continuous doesn't work (ggplot2) - r

I have a data:
df_1 <- data.frame(
x = replicate(
n = 2, expr = rnorm(n = 3000, mean = 100, sd = 10)
),
y = sample(x = 1:3, size = 3000, replace = TRUE)
)
And the follow function:
library(tidyverse)
ggplot(data = df_1, mapping = aes(x = x.1, fill = x.1)) +
geom_histogram(color = 'black', bins = 100) +
scale_fill_continuous(low = 'blue', high = 'red') +
theme_dark()
scale_fill_continuous doesn't work. The graph is black and gray.
Tks.

The problem, I think, is that there are nrow(df_1) values for fill, but only 100 are needed. This could be solved by pre-calculating the bin positions and counts and plotting with geom_col, but a neater solution is to use stat. stat is supposed to be for computed variables (e.g. stat(count) - see ?geom_histogram) but we can give it the vector 1:nbin and it works.
df_1 <- data.frame(
x = replicate(n = 2, expr = rnorm(n = 3000, mean = 100, sd = 10)),
y = sample(x = 1:3, size = 3000, replace = TRUE)
)
library(tidyverse)
nbins <- 100
ggplot(data = df_1, mapping = aes(x = x.1, fill = stat(1:nbins))) +
geom_histogram(bins = nbins) +
scale_fill_continuous(low = "red", high = "blue")
Created on 2020-01-19 by the reprex package (v0.3.0)

The aes fill should be stat(count) rather than x.1
ggplot(data = df_1, mapping = aes(x = x.1, fill = stat(count))) +
geom_histogram(color = 'black', bins = 100) +
scale_fill_continuous(type = "gradient", low = "blue", high = "red") +
theme_dark()

Related

Grid as bars in ggplot

A common layout in many sites is to draw the grid as shaded bars:
I'm doing this with this function:
grid_bars <- function(data, y, n = 5, fill = "gray90") {
breaks <- pretty(data[[y]], n)
len <- length(breaks)-1
all_bars <- data.frame(
b.id = rep(1:len, 4),
b.x = c(rep(-Inf, len), rep(Inf, len*2), rep(-Inf, len)),
b.y = c(rep(breaks[-length(breaks)], 2), rep(breaks[-1], 2))
)
bars <- all_bars[all_bars$b.id %in% (1:len)[c(FALSE, TRUE)], ]
grid <- list(
geom_polygon(data = bars, aes(b.x, b.y, group = b.id),
fill = fill, colour = fill),
scale_y_continuous(breaks = breaks),
theme(panel.grid = element_blank())
)
return(grid)
}
#-------------------------------------------------
dat <- data.frame(year = 1875:1972,
level = as.vector(LakeHuron))
ggplot(dat, aes(year, level)) +
grid_bars(dat, "level", 10) +
geom_line(colour = "steelblue", size = 1.2) +
theme_classic()
But it needs to specify data and y again. How to take those directly from the ggplot?
After having a look at the options for extending ggplot2 in Hadley Wickham's book on ggplot2 you probably have to set up your own Geom or Stat layer to achieve the desired result. This way you can access the data and aesthetics specified in ggplot() or even pass different data and aesthetics to your fun. Still a newbie in writing extensions for ggplot2 but a first approach may look like so:
library(ggplot2)
# Make bars dataframe
make_bars_df <- function(y, n) {
breaks <- pretty(y, n)
len <- length(breaks) - 1
all_bars <- data.frame(
group = rep(1:len, 4),
x = c(rep(-Inf, len), rep(Inf, len * 2), rep(-Inf, len)),
y = c(rep(breaks[-length(breaks)], 2), rep(breaks[-1], 2))
)
all_bars[all_bars$group %in% (1:len)[c(FALSE, TRUE)], ]
}
# Setup Geom
geom_grid_bars_y <- function(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, n = 5, ...) {
layer(
geom = GeomGridBarsY, mapping = mapping, data = data, stat = stat,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(n = n, ...)
)
}
GeomGridBarsY <- ggproto("GeomGridBarsY", Geom,
required_aes = c("y"),
default_aes = aes(alpha = NA, colour = NA, fill = "gray90", group = NA,
linetype = "solid", size = 0.5, subgroup = NA),
non_missing_aes = aes("n"),
setup_data = function(data, params) {
transform(data)
},
draw_group = function(data, panel_scales, coord, n = n) {
bars <- make_bars_df(data[["y"]], n)
# setup data for GeomPolygon
## If you want this to work with facets you have to take care of the PANEL
bars$PANEL <- factor(1)
# Drop x, y, group from data
d <- data[ , setdiff(names(data), c("x", "y", "group"))]
d <- d[!duplicated(d), ]
# Merge information in data to bars
bars <- merge(bars, d, by = "PANEL")
# Set color = fill
bars[["colour"]] <- bars[["fill"]]
# Draw
grid::gList(
ggplot2::GeomPolygon$draw_panel(bars, panel_scales, coord)
)
},
draw_key = draw_key_rect
)
grid_bars <- function(n = 5, fill = "gray90") {
list(
geom_grid_bars_y(n = n, fill = fill),
scale_y_continuous(breaks = scales::pretty_breaks(n = n)),
theme(panel.grid = element_blank())
)
}
dat <- data.frame(year = 1875:1972,
level = as.vector(LakeHuron))
ggplot(dat, aes(year, level)) +
grid_bars(n = 10, fill = "gray95") +
geom_line(colour = "steelblue", size = 1.2) +
theme_classic()
Just for reference:
A first and simple approach to get grid bars one could simply adjust the size of the grid lines via theme() like so:
# Simple approach via theme
ggplot(dat, aes(year, level)) +
geom_line(colour = "steelblue", size = 1.2) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
theme_classic() +
theme(panel.grid.major.y = element_line(size = 8))
Created on 2020-06-14 by the reprex package (v0.3.0)

how to ggplot with upper and lower bound as shaded using facet_wrap in R?

I am trying to automate the process of plotting data using ggplot and the facet_wrap functionality. I want a single y-axis label instead individual plot Ob (i.e., A_Ob, B_ob etc) and also a single X-axis not all the plots having label for x-axis such as below. Below is my sample code using gridextra package. However, i would like to do it through facet_wrap as i have many other plots to draw which i think will save me sometime.
graphics.off()
rm(list = ls())
library(tidyverse)
library(gridExtra)
G1 = data.frame(A_Ob = runif(1000, 5, 50), A_Sim = runif(1000, 3,60), A_upper = runif(1000, 10,70), A_lower = runif(1000, 0, 45 ),
B_Ob = runif(1000, 5, 50), B_Sim = runif(1000, 3,60), B_upper = runif(1000, 10,70), B_lower = runif(1000, 0, 45 ),
C_Ob = runif(1000, 5, 50), C_Sim = runif(1000, 3,60), C_upper = runif(1000, 10,70), C_lower = runif(1000, 0, 45 ),
D_Ob = runif(1000, 5, 50), D_Sim = runif(1000, 3,60), D_upper = runif(1000, 10,70), D_lower = runif(1000, 0, 45 ),
Pos = 1:1000)
A1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = A_Ob), col = "black")+
geom_line(aes(y = A_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = A_upper, ymax = A_lower), fill = "grey70")
B1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = B_Ob), col = "black")+
geom_line(aes(y = B_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = B_upper, ymax = B_lower), fill = "grey70")
C1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = C_Ob), col = "black")+
geom_line(aes(y = C_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = C_upper, ymax = C_lower), fill = "grey70")
D1 = ggplot(data = G1, aes(x = Pos))+
geom_line(aes(y = D_Ob), col = "black")+
geom_line(aes(y = D_Sim), col = "blue")+
geom_vline(xintercept = 750, color = "red", size=1.5)+
geom_ribbon(aes(ymin = D_upper, ymax = D_lower), fill = "grey70")
grid.arrange(A1,B1,C1,D1, nrow = 4)
Here is the result of the code
You need to reshape your dataframe into a longer format and separate values for Ob, Sim, upper and lower.
Using the function melt from data.table package can help you to achieve this:
library(data.table)
setDT(G1)
Ob_cols = grep("_Ob",colnames(G1),value = TRUE)
Sim_cols = grep("_Sim",colnames(G1),value = TRUE)
Upper_cols = grep("_upper",colnames(G1), value = TRUE)
Lower_cols = grep("_lower", colnames(G1), value = TRUE)
g.m <- melt(G1, measure = list(Ob_cols,Sim_cols,Upper_cols,Lower_cols), value.name = c("OBS","SIM","UP","LOW"))
levels(g.m$variable) <- c("A","B","C","D")
Pos variable OBS SIM UP LOW
1: 1 A 5.965488 29.167666 26.66783 29.97259
2: 2 A 23.855719 8.570245 43.75830 30.65616
3: 3 A 16.947887 51.201047 15.20758 39.76122
4: 4 A 49.883306 3.715319 34.38066 20.73177
5: 5 A 5.021938 3.102880 30.05036 32.05123
6: 6 A 19.887176 15.400853 53.67156 28.54982
and now, you can plot it:
library(ggplot2)
ggplot(g.m, aes(x = Pos))+
geom_line(aes(y = OBS), color = "black")+
geom_line(aes(y = SIM), color = "blue")+
geom_vline(xintercept = 750,color = "red", size = 1.5)+
geom_ribbon(aes(ymin = UP, ymax = LOW), fill = "grey70")+
facet_grid(variable~.)
EDIT: Adding annotations & renaming labels
To rename and replace facet labels, you can re-define levels of variable and use facet_wrap instead of facet_grid using ncol = 1 as argument.
To add multiple annotations on a single panel, you need to define a dataframe that you will use in geom_text.
Altogether, you have to do:
# renaming names of each facets:
levels(g.m$variable) <- c("M1","M2","M3","M4")
# Defining annotations to add:
df_text <- data.frame(label = c("Calibration", "Validation"),
x = c(740,760),
y = c(65,65),
hjust = c(1,0),
variable = factor("M1", levels = c("M1","M2","M3","M4")))
# Plotting
ggplot(g.m, aes(x = Pos))+
geom_line(aes(y = OBS), color = "black")+
geom_line(aes(y = SIM), color = "blue")+
geom_vline(xintercept = 750,color = "red", size = 1.5)+
geom_ribbon(aes(ymin = UP, ymax = LOW), fill = "grey70")+
facet_wrap(variable~., ncol = 1)+
theme(strip.text.x = element_text(hjust = 0),
strip.background = element_rect(fill = "white"))+
geom_text(data = df_text, aes(x = x, y = y, label = label, hjust = hjust), color = "red")
Does it look what you are expecting ?

Generating multiple geom_smooth lines of data samples

Attempting to build a new geom function here that will take a sample of points from a dataset by group, and fit a number of local regressions through the individual subsets. This would generate multiple local regression lines as samples of a full dataset. In the end generating something akin to this:
Though I'm continuing to get errors with the function I've built below (with reprex). Any assistance is appreciated. Thank you!
library(ggplot2)
library(dplyr)
geom_mline <- function(mapping = NULL, data = NULL, stat = "mline",
position = "identity", show.legend = NA,
inherit.aes = TRUE, na.rm = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
geom = geomMline,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...)
)
}
geomMline <- ggproto("geomMline", GeomLine,
required_aes = c("x", "y"),
default_aes = aes(colour = "black", size = 0.5, linetype = 1, alpha = NA)
)
stat_mline <- function(mapping = NULL, data = NULL, geom = "line",
position = "identity", show.legend = NA, inherit.aes = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
stat = StatMline,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...
)
)
}
StatMline <- ggproto("StatMline", Stat,
required_aes = c("x", "y"),
compute_group = function(self, data, scales, params,
SPAN = .9, N_size = 50, N_LOESS = 50) {
tf <- tempfile(fileext=".png")
png(tf)
plot.new()
colnames(data) <- c("x", "variable", "y")
LOESS_DF <- data.frame(y = seq(min(data$x),
max(data$x),
length.out = 50))
for(i in 1:N_LOESS){
# sample N_size points
df_sample <- sample_n(data, N_size)
# fit a loess
xx <- df_sample$x
yy <- df_sample$y
tp_est <- loess(yy ~ xx , span = SPAN)
# predict accross range of x using loess model
loess_vec <- data.frame(
predict(tp_est, newdata =
data.frame(xx = seq(min(data$x), max(data$x), length.out = 500))))
colnames(loess_vec) <- as.character(i)
# repeat x times
LOESS_DF <- cbind(LOESS_DF,loess_vec)
#str(LOESS_DF)
}
invisible(dev.off())
unlink(tf)
data.frame(reshape2::melt(LOESS_DF, id = "y"))
}
)
# dummy data
library(reshape2)
x <- seq(1,1000,1)
y1 <- rnorm(n = 1000,mean = x*2^1.1, sd = 200)
y2 <- rnorm(n = 1000,mean = x*1, sd = 287.3)
y3 <- rnorm(n = 1000,mean = x*1.1, sd = 100.1)
data <- data.frame(x , y1, y2, y3)
data <- melt(data, id.vars = "x")
str(data)
ggplot(data,aes(x,value,group = variable, color = va
riable))+geom_point()
ggplot(data = data, aes(x = x, y = value, group=variable, color = variable)) +
#geom_point(color="black") +
#geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
#stat_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
geom_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
#data <- subset(data, variable == "y2")
You could use the existing geom_smooth geom and use lapply to generate geom_smooth calls from multiple random samples from the original data frame. For example:
# Fake data
set.seed(2)
dat = data.frame(x = runif(100, 0, 10))
dat$y = 2*dat$x - 0.5*dat$x^2 - 5 + rnorm(100, 0, 5)
ggplot(dat, aes(x, y)) +
geom_point() +
lapply(1:10, function(i) {
geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE)
})
Or, keeping it all in the tidyverse:
library(tidyverse)
ggplot(dat, aes(x, y)) +
geom_point() +
map(1:10, ~geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE))
Here's a way to plot the quantiles within ggplot. I'm not sure if it's possible to get stat_quantile to plot a ribbon. To get that, you might have to calculate the quantile regression outside of ggplot and add use geom_ribbon to add the values.
ggplot(dat, aes(x, y)) +
geom_point() +
geom_quantile(quantiles=c(0.1, 0.5, 0.9), formula=y ~ poly(x, 2),
aes(color=factor(..quantile..), size=factor(..quantile..))) +
scale_color_manual(values=c("red","blue","red")) +
scale_size_manual(values=c(1,2,1)) +
labs(colour="Quantile") +
guides(colour=guide_legend(reverse=TRUE), size=FALSE) +
theme_classic()

Making a specific quantile plot in R

I am very intrigued by the following visulization (Decile term)
And I wonder how it would be possible to do it in R.
There is of course histograms and density plots, but they do not make such a nice visualization. Especially, I would like to know if it possible to do it with ggplot/tidyverse.
edit in response to the comment
library(dplyr)
library(ggplot2)
someData <- data_frame(x = rnorm(1000))
ggplot(someData, aes(x = x)) +
geom_histogram()
this produces a histogram (see http://www.r-fiddle.org/#/fiddle?id=LQXazwMY&version=1)
But how I can get the coloful bars? How to implement the small rectangles? (The arrows are less relevant).
You have to define a number of breaks, and use approximate deciles that match those histogram breaks. Otherwise, two deciles will end up in one bar.
d <- data_frame(x = rnorm(1000))
breaks <- seq(min(d$x), max(d$x), length.out = 50)
quantiles <- quantile(d$x, seq(0, 1, 0.1))
quantiles2 <- sapply(quantiles, function(x) breaks[which.min(abs(x - breaks))])
d$bar <- as.numeric(as.character(cut(d$x, breaks, na.omit((breaks + dplyr::lag(breaks)) / 2))))
d$fill <- cut(d$x, quantiles2, na.omit((quantiles2 + dplyr::lag(quantiles2)) / 2))
ggplot(d, aes(bar, y = 1, fill = fill)) +
geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1])
Or with more distinct colors:
ggplot(d, aes(bar, y = 1, fill = fill)) +
geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1]) +
scale_fill_brewer(type = 'qual', palette = 3) # The only qual pallete with enough colors
Add some styling and increase the breaks to 100:
ggplot(d, aes(bar, y = 1, fill = fill)) +
geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1], size = 0.3) +
scale_fill_brewer(type = 'qual', palette = 3) +
theme_classic() +
coord_fixed(diff(breaks)[1], expand = FALSE) + # makes square blocks
labs(x = 'x', y = 'count')
And here is a function to make that last one:
decile_histogram <- function(data, var, n_breaks = 100) {
breaks <- seq(min(data[[var]]), max(data[[var]]), length.out = n_breaks)
quantiles <- quantile(data[[var]], seq(0, 1, 0.1))
quantiles2 <- sapply(quantiles, function(x) breaks[which.min(abs(x - breaks))])
data$bar <- as.numeric(as.character(
cut(data[[var]], breaks, na.omit((breaks + dplyr::lag(breaks)) / 2)))
)
data$fill <- cut(data[[var]], quantiles2, na.omit((quantiles2 + dplyr::lag(quantiles2)) / 2))
ggplot2::ggplot(data, ggplot2::aes(bar, y = 1, fill = fill)) +
ggplot2::geom_col(position = 'stack', col = 1, show.legend = FALSE, width = diff(breaks)[1], size = 0.3) +
ggplot2::scale_fill_brewer(type = 'qual', palette = 3) +
ggplot2::theme_classic() +
ggplot2::coord_fixed(diff(breaks)[1], expand = FALSE) +
ggplot2::labs(x = 'x', y = 'count')
}
Use as:
d <- data.frame(x = rnorm(1000))
decile_histogram(d, 'x')

position of geom_vline() legend shifts

I have several plots like the one below. My problem is that the legend for geom_vline() (Type) shifts across plots, sometimes appearing above the "Mean" legend, sometimes below.
How can I specify the position of the geom_vline() legend (or the other legend), such that I do not have variation across plots in my paper?
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean = 50, sd = 20),
Type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$Type == "b"] <- data$value[data$Type == "b"] +
rnorm(sum(data$Type == "b"), mean = 55)
gp <- ggplot(data = data, aes_string(x = "value"))
gp <- gp + geom_density(aes_string(fill = "Type"), alpha = 0.3)
vlines <- data.frame(value = c(mean(data$value[data$Type == "a"]),
mean(data$value[data$Type == "b"])),
Mean = c("A", "B"))
gp2 <- gp + geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE)
gp3 <- gp2 + geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05)
gp3
You can pass the order parameter of guide_legend passed to the guide parameter of the scale_* functions for the guides you want to rearrange. For example:
library(ggplot2)
set.seed(1234)
data <- data.frame(value = rnorm(n = 10000, mean =50, sd = 20),
Type = sample(letters[1:2], size = 10000, replace = TRUE))
data$value[data$Type == "b"] <- data$value[data$Type == "b"] +
rnorm(sum(data$Type == "b"), mean = 55)
vlines <- data.frame(value = c(mean(data$value[data$Type == "a"]),
mean(data$value[data$Type == "b"])),
Mean = c("A", "B"))
ggplot(data, aes(x = value)) +
geom_density(aes(fill = Type), alpha = 0.3) +
geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE) +
geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05) +
scale_fill_discrete(guide = guide_legend(order = 1)) + # fill first
scale_color_discrete(guide = guide_legend(order = 2)) # color second
ggplot(data, aes(x = value)) +
geom_density(aes(fill = Type), alpha = 0.3) +
geom_vline(data = vlines, aes(xintercept = value, colour = Mean),
size = 1.05, linetype = "dashed", show.legend = TRUE) +
geom_vline(xintercept = (50 + 55 + 50) / 2, size = 1.05) +
scale_fill_discrete(guide = guide_legend(order = 2)) + # now fill second
scale_color_discrete(guide = guide_legend(order = 1)) # and color first

Resources