I would like to make a contour plot with ggplot2 by using gam results. Below is a detailed explanation of what I want:
#packages
library(mgcv)
library(ggplot2)
library(tidyr)
#prepare data
df <- data.frame(x = iris$Sepal.Width,
y = iris$Sepal.Length,
z = iris$Petal.Length)
#fit gam
gam_fit <- gam(z ~
s(x) +
s(y),
data=df,na.action = "na.fail")
To predict z values based on the gam_fit, I found a way from https://drmowinckels.io/blog/2019-11-16-plotting-gamm-interactions-with-ggplot2/
#predict z values
df_pred <- expand_grid(
x = seq(from=min(df$x),
to=max(df$x),
length.out = 100),
y = seq(from=min(df$y),
to=max(df$y),
length.out = 100)
)
df_pred <- predict(gam_fit, newdata = df_pred,
se.fit = TRUE) %>%
as_tibble() %>%
cbind(df_pred)
gg <- ggplot() +
geom_tile(data=df_pred, aes(x=x, y=y, fill = fit)) +
geom_point(data=df,aes(x=x, y=y))+
scale_fill_distiller(palette = "YlGnBu")+
geom_contour(data=df_pred, aes(x=x, y=y, z = fit), colour = "white")
print(gg)
This give me a below plot
My goal is removing tile and contour at where there are no measured x-y points. For example, there is no measured points around the top-right & top-left corners of the plot.
I wonder if mgcViz can achieve this, but it requires including x & y as an interaction term as below (also I am not sure how to add measured points on the below figure):
library(mgcViz)
gamm_fit2 <- gam(z ~
s(x,y),
data=df,na.action = "na.fail") #,REML=TRUE
b <- getViz(gamm_fit2)
plot(sm(b, 1))
I think df_pred may not the best format to achieve my goal, but I am not sure how to do this. I would be grateful if you give me any solution with ggplot2.
There might be a package designed to handle this task, but if you can't find the right 'tool' for the job one option is to draw a polygon around the 'points' and colour everything outside the polygon grey, e.g.
library(tidyverse)
library(mgcv)
#prepare data
df <- data.frame(x = iris$Sepal.Width,
y = iris$Sepal.Length,
z = iris$Petal.Length)
#fit gam
gam_fit <- gam(z ~
s(x) +
s(y),
data=df,na.action = "na.fail")
df_pred <- expand_grid(
x = seq(from=min(df$x),
to=max(df$x),
length.out = 100),
y = seq(from=min(df$y),
to=max(df$y),
length.out = 100)
)
df_pred <- predict(gam_fit, newdata = df_pred,
se.fit = TRUE) %>%
as_tibble() %>%
cbind(df_pred)
ggplot() +
geom_tile(data=df_pred, aes(x=x, y=y, fill = fit)) +
geom_point(data=df,aes(x=x, y=y))+
scale_fill_distiller(palette = "YlGnBu")+
geom_contour(data=df_pred, aes(x=x, y=y, z = fit), colour = "white") +
coord_cartesian(xlim = c(1.9, 4.5),
ylim = c(4, 8))
# Get the 'hull' around all of the dots
hulls <- df[chull(df$x, df$y), ]
# Get the 'edges' of the frame, starting at the first hull point
edges <- data.frame(x = c(4.1,4.5,4.5,1.9,1.9,4.5),
y = c(5.2,4,8,8,4,4),
z = NA)
# Combine
draw_poly <- rbind(hulls, edges)
# Draw the plot, and overlay the gray polygon
ggplot() +
geom_tile(data=df_pred, aes(x=x, y=y, fill = fit)) +
geom_point(data=df, aes(x=x, y=y)) +
scale_fill_distiller(palette = "YlGnBu") +
geom_contour(data=df_pred, aes(x=x, y=y, z = fit), colour = "white") +
geom_polygon(data=draw_poly, aes(x=x, y=y), fill = "grey")
# Without the points
ggplot() +
geom_tile(data=df_pred, aes(x=x, y=y, fill = fit)) +
# geom_point(data=df, aes(x=x, y=y)) +
scale_fill_distiller(palette = "YlGnBu") +
geom_contour(data=df_pred, aes(x=x, y=y, z = fit), colour = "white") +
geom_polygon(data=draw_poly, aes(x=x, y=y), fill = "grey")
Created on 2022-09-16 by the reprex package (v2.0.1)
Here's another example using the concaveman package to calculate the concave hull:
library(ggforce)
#install.packages("concaveman")
library(concaveman)
border <- concaveman(as.matrix(df[,1:2]), concavity = 2)
edges <- data.frame(V1 = c(4.5,4.5,1.9,1.9,4.5),
V2 = c(4,8,8,4,4))
draw_poly <- rbind(border, edges)
ggplot() +
geom_tile(data=df_pred, aes(x=x, y=y, fill = fit)) +
geom_point(data=df, aes(x=x, y=y)) +
scale_fill_distiller(palette = "YlGnBu") +
geom_contour(data=df_pred, aes(x=x, y=y, z = fit), colour = "white") +
geom_shape(data=draw_poly, aes(x=V1, y=V2), fill = "grey",
expand = unit(-0.05, "cm"))
ggplot() +
geom_tile(data=df_pred, aes(x=x, y=y, fill = fit)) +
# geom_point(data=df, aes(x=x, y=y)) +
scale_fill_distiller(palette = "YlGnBu") +
geom_contour(data=df_pred, aes(x=x, y=y, z = fit), colour = "white") +
geom_shape(data=draw_poly, aes(x=V1, y=V2), fill = "grey",
expand = unit(-0.05, "cm"))
Created on 2022-09-16 by the reprex package (v2.0.1)
To get something more akin to how mgcv::plot.gam() and mgcViz produce their plots for something like this, you need to identify pairs of covariates that lie too far from the support of your data. The reason we might prefer this over say clipping the predictions to the convex hull of the observations is that some mild extraxpolation beyond the data is probably not too much of a violation of the fact that splines have penalties that apply over the range of the data only. From a more pragmatic view, and this is something shown in the Anderson's Iris data used in the example, there are regions of the covariate space where we would have to interpolate that lie as far as, if not further from, the support of the data than point we might extrapolate to.
mgcv has a function for doing this called exclude.too.far(), so if you want total control you can do, reusing code from #jared_mamrot's excellent answer (modified a little)
library("dplyr")
library("tidyr")
library("ggplot2")
library("mgcv")
# prepare data
df <- with(iris, data.frame(x = Sepal.Width,
y = Sepal.Length,
z = Petal.Length))
#fit gam
gam_fit <- gam(z ~ s(x) + s(y), data = df, method = "REML")
df_new <- with(df, expand_grid(x = seq(from = min(x), to = max(x),
length.out = 100),
y = seq(from = min(y), to = max(y),
length.out = 100)))
df_pred <- predict(gam_fit, newdata = df_new)
df_pred <- tibble(fitted = df_pred) |>
bind_cols(df_new)
Now we can find out which of our rows in the grid we're predicting at represent covariate pairs that are too far from the support of the original data. What exclude.too.far() does is transform the pairs of covariates in the prediction grid to a unit square, with [0,0] representing the coordinate (min(x), min(y)), and [1,1] the coordinate (max(x), max(y)). It transforms than original covariate data onto this unit square also. It then computes the euclidean distance between each point in the grid (on the unit square) and each row in the observed data (projected on to the unit square).
Any observation that lies > dist from a node in the prediction grid is then identified to be excluded as lying too far from the support of the data. dist is the argument that controls what we mean by "too far". dist is specified in terms of the unit square, so the maximum any two points can be on the unit square is
r$> dist(data.frame(x = c(0,1), y = c(0,1)))
1
2 1.414214
The default in plot.gam and IIRC in mgcvViz is dist = 0.1. If we do this for our example
drop <- exclude.too.far(df_pred$x, df_pred$y, df$x, df$y, dist = 0.1)
drop is now a logical vector of length nrow(df_pred), with TRUE indicating we should exclude the observation pair.
Using drop we can set fitted to NA for the points we want to exclude:
df_pred <- df_pred |>
mutate(fitted = if_else(drop, NA_real_, fitted))
Now we can plot:
df_pred |>
ggplot(aes(x = x, y = y, fill = fitted)) +
geom_tile() +
geom_point(data = df, aes(x = x, y = y, fill = NULL)) +
scale_fill_distiller(palette = "YlGnBu") +
geom_contour(aes(z = fitted, fill = NULL), colour = "white")
producing
You can do this a bit more easily using my gratia package (IMHO), but the general idea is the same
# remotes::install_github("gavinsimpson/gratia") # need's dev version
library("gratia")
# prepare data
df <- with(iris, data.frame(x = Sepal.Width,
y = Sepal.Length,
z = Petal.Length))
# fit model
gam_fit <- gam(z ~ s(x) + s(y), data = df, method = "REML")
# prepare a data slice through the covariate space
ds <- data_slice(gam_fit, x = evenly(x, n = 100), y = evenly(y, n = 100))
# predict
fv <- fitted_values(gam_fit, data = ds)
# exclude points that are too far
drop <- too_far(ds$x, ds$y, df$x, df$y, dist = 0.1)
fv <- fv |>
mutate(fitted = if_else(drop, NA_real_, fitted))
# then plot
fv |>
ggplot(aes(x = x, y = y, fill = fitted)) +
geom_tile() +
geom_point(data = df, aes(x = x, y = y, fill = NULL)) +
scale_fill_distiller(palette = "YlGnBu") +
geom_contour(aes(z = fitted, fill = NULL), colour = "white")
I have a raster data and wants to make contour graph similar to the this question enter link description here. I got the code from here. But I want to highlight (colour) the
regions which is above 75 percentile and remaining by the simple lines that are shown in picture below. I copied the code from the the above link
enter image description here
Code is folowing
library(raster)
library(sf)
library(sp)
r <- raster(t((volcano[,ncol(volcano):1] - 94) * 4.95))
Let's mock with shapefile
poly <- st_as_sfc(st_bbox(st_as_sf(rasterToPolygons(r))))
set.seed(3456)
sample <- st_sample(poly, 4)
sample <- st_buffer(sample, c(0.01, 0.02, 0.03))
sample <- st_sf(x=1:4, sample)
st_write(sample, "1aa.shp", append = FALSE)
library(raster)
library(sf)
r <- raster(t((volcano[,ncol(volcano):1] - 94) * 4.95))
Use sf!!
pg <- st_read("1aa.shp") # loadshapfile
plot(r)
plot(st_geometry(pg), add= TRUE,)
#Now work with geom_sf() on your pg object:
centile90 <- quantile(r, 0.90)
df <- as.data.frame(as(r, "SpatialPixelsDataFrame"))
colnames(df) <- c("value", "x", "y")
library(ggplot2)
mybreaks <- seq(0, 500, 50)
ggplot(df, aes(x, y, z = value)) +
geom_contour_filled(breaks = mybreaks) +
geom_contour(breaks = centile90, colour = "pink",
size = 0.5) +
geom_sf(data=pg, fill="black", inherit.aes = FALSE) +
scale_fill_manual(values = hcl.colors(length(mybreaks)-1, "Zissou1", rev = FALSE)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme_classic() +
theme()
I just want to make the picture which is highlighted only the region which is above 75 percentile by this code.
You can set the breaks of geom_contour_filled to start at your 75th centile, and make the NA value of scale_fill_manual transparent. You also need to draw in the default contour lines:
centile75 <- quantile(r, 0.75)
ggplot(df, aes(x, y, z = value)) +
geom_contour(color = 'gray') +
geom_contour_filled(breaks = seq(centile70, max(df$value), length = 5)) +
geom_sf(data=pg, fill="black", inherit.aes = FALSE) +
scale_fill_manual(
values = hcl.colors(4, "Zissou1"),
na.value = "#00000000") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme_classic() +
theme()
How do I color outliers that are above a specific value using ggplot2 in R?.
(Sorry for the seemingly easy question, I am a beginner. the reason why is that these are frequencies of a value of 0, I am then transforming this column of data by taking the -log10(). So anything that has a frequency of 0 would then be transformed into Inf. Attached is a screenshot of my plot, essentially I want to make all the outlier points above 10 on the y axis to be a different color.
boxplots <- function(df){
df$'frequency'[is.na(df$'frequency')] <- 0.00
df$'-log10(frequency)' <- -log10(df$'frequency')
x <- data.frame(group = 'x', value = df$'-log10(frequency)'[df$'Type'=='x'])
y <- data.frame(group = 'y', value = df$'-log10(frequency)'[df$'Type'=='y'])
z <- data.frame(group = 'z', value = df$'-log10(frequency)'[df$'Type'=='c=z'])
plot.data <<- rbind(x, y, z)
labels <- c("z", "y", "z")
t<-plot.data %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)+
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Distribution of -log10(frequency) by Type") +
xlab("Type")+
ylab("-log10(frequency)")+
scale_x_discrete(labels=labels)+
scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2))
print(t)
s<<-t
ggsave("frequency_by_type.png", plot = t)
}
you could just create a new column indicating wheather it is an outlier or not and map this to the geom_jitter color. I resumed the answer in a smaller example but you should be able to fit this accordingly:
library(ggplot2)
library(viridis)
plot.data <- data.frame(group = c("1","1","1","1","1","2","2","2","2","2"),
value = c(1,5,10,6,3,1,5,10,6,3))
t<-plot.data %>%
mutate(outlier = ifelse(value >9, "YES", "NO")) %>%
ggplot(aes(x = group, y = value, fill = group))+
geom_boxplot()+
geom_jitter(aes(group, value, color = outlier) , size=2, alpha=0.9)+
scale_fill_viridis(discrete = TRUE, alpha = 0.6)
t
library(ggplot2)
# Basic box plot
p <- ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot()
p
# Rotate the box plot
p + coord_flip()
# Notched box plot
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(notch=TRUE)
# Change outlier, color, shape and size
ggplot(ToothGrowth, aes(x=dose, y=len)) +
geom_boxplot(outlier.colour="red", outlier.shape=8,
outlier.size=4)
I would like to change the spread display of the legend bar as on the jpeg below. In example 1, I'd like to display the "100" threshold value in the middle of the legend bar. In example 2, I'd like to display the mean value (white colour) in the middle of the legend bar. I suspect both would require similar line of code. Could someone very kindly help me with this?
#library
library(raster)
library(ggplot2)
#sample raster
r <- raster(ncol=36, nrow=18)
r[] <- (-ncell(r)/2+1):(ncell(r)/2)
r[1,] <- 5000
plot(r)
var_df <- as.data.frame(rasterToPoints(r))
### example 1
p <- ggplot()
p <- p + geom_raster(data = var_df , aes(x = x, y = y, fill = layer))
p <- p + coord_equal()
p <- p + scale_fill_gradientn(
colours=c("red", "yellow", "skyblue", "darkblue"),
values = rescale(c(min(var_df$layer),
100,
100.01,
max(var_df$layer))))
p
### example 2
meanval <- mean(var_df$layer)
p <- ggplot()
p <- p + geom_raster(data = var_df , aes(x = x, y = y, fill = layer))
p <- p + coord_equal()
p <- p + scale_fill_gradient2(low = muted("red"), mid = "white",
high = muted("blue"), midpoint = meanval)
p