ggplot legend - how to change the colour spread? - r

I would like to change the spread display of the legend bar as on the jpeg below. In example 1, I'd like to display the "100" threshold value in the middle of the legend bar. In example 2, I'd like to display the mean value (white colour) in the middle of the legend bar. I suspect both would require similar line of code. Could someone very kindly help me with this?
#library
library(raster)
library(ggplot2)
#sample raster
r <- raster(ncol=36, nrow=18)
r[] <- (-ncell(r)/2+1):(ncell(r)/2)
r[1,] <- 5000
plot(r)
var_df <- as.data.frame(rasterToPoints(r))
### example 1
p <- ggplot()
p <- p + geom_raster(data = var_df , aes(x = x, y = y, fill = layer))
p <- p + coord_equal()
p <- p + scale_fill_gradientn(
colours=c("red", "yellow", "skyblue", "darkblue"),
values = rescale(c(min(var_df$layer),
100,
100.01,
max(var_df$layer))))
p
### example 2
meanval <- mean(var_df$layer)
p <- ggplot()
p <- p + geom_raster(data = var_df , aes(x = x, y = y, fill = layer))
p <- p + coord_equal()
p <- p + scale_fill_gradient2(low = muted("red"), mid = "white",
high = muted("blue"), midpoint = meanval)
p

Related

Draw a box around the legend graphic in ggplot?

A similar sounding question is asked here. However, in the linked question, they put a bounding box around the legend and legend title. I was wondering if it's possible to put a bounding box around the legend graphic. For example,
library(ggplot2)
# Dummy data
x <- LETTERS[1:20]
y <- paste0("var", seq(1,20))
data <- expand.grid(X=x, Y=y)
data$Z <- runif(400, 0, 5)
# Heatmap
ggplot(data, aes(X, Y, fill= Z)) +
geom_tile()+
scale_fill_gradient(low = "white" ,high = "red")
The above code creates this plot:
Whereas, I am trying to create something like this:
I tried playing with legend.background function from ggplot but I can't get it to work for me.
Any suggestions as to how I would do this?
I don't think you can do this with theme() - you can do it with guides() though, e.g.
library(ggplot2)
# Dummy data
x <- LETTERS[1:20]
y <- paste0("var", seq(1,20))
data <- expand.grid(X=x, Y=y)
data$Z <- runif(400, 0, 5)
# Heatmap
ggplot(data, aes(X, Y, fill= Z)) +
geom_tile()+
scale_fill_gradient(low = "white" ,high = "red") +
guides(fill = guide_colorbar(frame.colour = "black", frame.linewidth = 1.5))
Edit per comment
Looking at the source code for new_scale() there should be a way to apply this solution to two of the same scales, but I can't figure it out. I reckon you should post another question and see if someone can solve it. Until then, maybe this workaround based on cowplot will work for you e.g.
## (I changed 'fill' to 'color' here, but the concept is the same)
library(ggplot2)
library(ggnewscale)
library(cowplot)
# Dummy data
x <- LETTERS[1:20]
y <- paste0("var", seq(1,20))
data <- expand.grid(X=x, Y=y)
data$Z <- runif(400, 0, 5)
# Heatmap with both scales but legends aren't plotted
p1 <- ggplot(data, aes(X, Y, color = Z)) +
geom_tile() +
scale_color_gradient(low = "white", high = "red") +
new_scale_color() +
geom_point(aes(color = Z)) +
theme(legend.position = "none")
# Heatmap with only the first scale
p2 <- ggplot(data, aes(X, Y, color = Z)) +
geom_tile() +
scale_color_gradient(low = "white", high = "red") +
guides(color = guide_colorbar(frame.colour = "black", frame.linewidth = 1.5))
# Heatmap with only the second scale
p3 <- ggplot(data, aes(X, Y, color = Z)) +
geom_point(aes(color = Z)) +
guides(color = guide_colorbar(frame.colour = "black", frame.linewidth = 1.5))
# Grab the legends using cowplot::get_legend()
p2_legend <- get_legend(p2)
p3_legend <- get_legend(p3)
# Combine the legends one on top of the other
legends <- plot_grid(p2_legend, p3_legend, ncol = 1, nrow = 2)
# Combine the heatmap with the legends
plot_grid(p1, legends, ncol = 2, align = "h", rel_widths = c(0.9, 0.1))
## You may need to tinker with spacing/scale/rel_widths/rel_heights to get it looking right, but it should work out ok with some effort

ggplot - mapping values below and above a threshold in different colour ranges

In the example below, I'd like to colour values above 100 in a blue colour scheme (from light blue for the closest to 100 to dark blue for the max) and values below 100 in a warm colour range (from yellow for the closest to 100 to red for the min). See example of colour range below. Could someone kindly help me on that? I have tried a few different ways (incl. the one below) but unsuccessfully.Thanks a lot!
#library
library(raster)
library(ggplot2)
library(maptools)
data("wrld_simpl")
#sample raster
r <- raster(ncol=36, nrow=18)
r[] <- (-ncell(r)/2+1):(ncell(r)/2)
plot(r)
var_df <- as.data.frame(rasterToPoints(r))
#plotting
p <- ggplot()
p <- p + geom_raster(data = var_df , aes(x = x, y = y, fill = layer))
p <- p + coord_equal()
p <- p + scale_fill_gradient2(low = muted("red"), mid = "white",
high = muted("blue"), midpoint = 100)
p
Hmmm... so, do you actually want it to split exactly at layer=100?
If so,
#plotting
p <- ggplot()
p <- p + geom_raster(data = var_df , aes(x = x, y = y, fill = layer))
p <- p + coord_equal()
p <- p + scale_fill_gradientn(
colours=c("red", "yellow", "skyblue", "darkblue"),
values = rescale(c(min(var_df$layer),
100,
100.01,
max(var_df$layer))))
One option may be a personal ifelse function to set the colours
colour_func <- function(x){
ifelse(x$x > 150, 'darkblue',
ifelse(x$x > 130, 'cyan4',
ifelse(x$x > 110, 'cadetblue3',
ifelse(x$x > 100, 'cadetblue',
ifelse(x$x > 90, 'red',
ifelse(x$x > 60, 'darkorange3',
ifelse(x$x > 40, 'darkorange',
ifelse(x$x > 20, 'goldenrod2', "gold"))))))))
}
> #plotting
> p <- ggplot()
> p <- p + geom_raster(data = var_df , aes(x = x, y = y, fill = layer))
> p <- p + coord_equal()
> p <- p + scale_fill_gradientn(colours = colour_func(var_df))
> p

ggplot2 confusion matrix geom_text labeling

I've plotted a confusion matrix (predicting 5 outcomes) in R using ggplot and scales for geom_text labeling.
The way geom_text(aes(label = percent(Freq/sum(Freq))) is written in code, it's showing Frequency of each box divided by sum of all observations, but what I want to do is get Frequency of each box divided by sum Frequency for each Reference.
In other words, instead of A,A = 15.8%,
it should be A,A = 15.8%/(0.0%+0.0%+0.0%+0.0%+15.8%%) = 100.0%
library(ggplot2)
library(scales)
valid_actual <- as.factor(c("A","B","B","C","C","C","E","E","D","D","A","A","A","E","E","D","D","C","B"))
valid_pred <- as.factor(c("A","B","C","C","E","C","E","E","D","B","A","B","A","E","D","E","D","C","B"))
cfm <- confusionMatrix(valid_actual, valid_pred)
ggplotConfusionMatrix <- function(m){
mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
"Kappa", percent_format()(m$overall[2]))
p <-
ggplot(data = as.data.frame(m$table) ,
aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = log(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "green") +
geom_text(aes(x = Reference, y = Prediction, label = percent(Freq/sum(Freq)))) +
theme(legend.position = "none") +
ggtitle(mytitle)
return(p)
}
ggplotConfusionMatrix(cfm)
The problem is that, as far as I know, ggplot is not able to do group calculation. See this recent post for similar question.
To solve your problem you should take advantage of the dplyrpackage.
This should work
library(ggplot2)
library(scales)
library(caret)
library(dplyr)
valid_actual <- as.factor(c("A","B","B","C","C","C","E","E","D","D","A","A","A","E","E","D","D","C","B"))
valid_pred <- as.factor(c("A","B","C","C","E","C","E","E","D","B","A","B","A","E","D","E","D","C","B"))
cfm <- confusionMatrix(valid_actual, valid_pred)
ggplotConfusionMatrix <- function(m){
mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
"Kappa", percent_format()(m$overall[2]))
data_c <- mutate(group_by(as.data.frame(m$table), Reference ), percentage =
percent(Freq/sum(Freq)))
p <-
ggplot(data = data_c,
aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = log(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "green") +
geom_text(aes(x = Reference, y = Prediction, label = percentage)) +
theme(legend.position = "none") +
ggtitle(mytitle)
return(p)
}
ggplotConfusionMatrix(cfm)
And the result:

Plot geom_scatterpie on a geom_tile plot

I want to plot pie charts using geom_scatterpie on top of a geom_tile plot. However, I am getting an error:
Error: Discrete value supplied to continuous scale
Here's the simple code that I cannot get to work:
library(ggplot2)
library(scatterpie)
nasafile <- "http://eosweb.larc.nasa.gov/sse/global/text/global_radiation"
nasa <- read.table(file=nasafile, skip=13, header=TRUE)
p <- ggplot(aes(y = Lat , x = Lon), data = nasa )+
geom_tile(aes(fill=Ann)) +
scale_fill_gradientn(colours=brewer.pal('YlOrRd', n=9)) +
theme_bw() +
coord_equal()
plot(p)
This works, but if I add the geom_scatterpie on top of that:
First the data for the pie charts to plot:
d <- data.frame(x=rnorm(5), y=rnorm(5))
d$A <- abs(rnorm(5, sd=1))
d$B <- abs(rnorm(5, sd=2))
d$C <- abs(rnorm(5, sd=3))
But I get the error when I do this:
p + geom_scatterpie(aes(x=x, y=y), data=d, cols=c("A", "B", "C")) + coord_fixed()
The problem is that your geom_tile uses a continuous fill scale while geom_scatterpie uses a discrete fill scale. It works if you change Ann to a factor. Not ideal, but this works:
nasa$Ann <- as.factor(as.integer(nasa$Ann))
mypalette <- brewer.pal(9, "YlOrRd") # 6 for geom_tile, 3 for geom_scatterpie
p <- ggplot(aes(y = Lat , x = Lon), data = nasa )+
geom_tile(aes(fill=Ann)) +
scale_fill_manual(values = mypalette) +
theme_bw() +
coord_equal()
p
d <- data.frame(x=rnorm(5, 0, 50), y=rnorm(5, 0, 30)) # larger sd
d$A <- abs(rnorm(5, sd=1))
d$B <- abs(rnorm(5, sd=2))
d$C <- abs(rnorm(5, sd=3))
p + geom_scatterpie(aes(x=x, y=y, r = 20), data=d, cols=c("A", "B", "C")) #larger radius
Or, using, size= instead of fill= (and no geom_scatterpie):
p <- ggplot(aes(y = Lat , x = Lon), data = nasa )+
geom_tile(aes(fill=Ann)) +
scale_fill_gradientn(colours=brewer.pal('YlOrRd', n=9)) +
theme_bw() +
coord_equal()
p
d <- data.frame(Lon = c(-100, 0, 100),
Lat = c(-50, 0, 50),
genvar = c(.1, .3, .5))
p + geom_point(data = d, aes(x = Lon, y = Lat, size = genvar),
color = "white")

Add a specific value of x-axis on ggplot

I am using the ggplot function to plot this kind of graph
image
I want to add the specific value of the x-axis as shown in the picture
this is my code :
quantiles <- quantile(mat,prob = quant)
x <- as.vector(mat)
d <- as.data.frame(x=x)
p <- ggplot(data = d,aes(x=x)) + theme_bw() +
geom_histogram(aes(y = ..density..), binwidth=0.001,color="black",fill="white") +
geom_density(aes(x=x, y = ..density..),fill="blue", alpha=0.5, color = 'black')
x.dens <- density(x)
df.dens <- data.frame(x = x.dens$x, y = x.dens$y)
p <- p + geom_area(data = subset(df.dens, x <= quantiles), aes(x=x,y=y),
fill = 'green', alpha=0.6)
print(p)

Resources