Specify number bins when passing sf object to ggmap R - r

I have a dataframe object, created by reading in a shape file with sf::read_sf and merged with some pre-existing data with a common geography column:
boundaries <- sf::read_sf('./shapefile')
map <- merge(boundaries, data, by.x = "InterZone",
by.y = "IntermediateZone2011Code", all.x = FALSE, duplicateGeoms = TRUE)
This is then overlaid using ggmap on top of a provider tile obtained with the sf get_map function:
myMap <- get_map(location = c(lon = -2.27, lat = 57.1), zoom = 6,
maptype="toner", crop=FALSE, source = 'stamen')
ggmap(myMap) +
geom_sf(data = map, aes(fill=as.factor(column1)), inherit.aes = FALSE) +
scale_fill_brewer(palette = "OrRd") +
coord_sf(crs = st_crs(4326)) +
labs(x = 'Longitude', y = 'Latitude', fill = 'column1') +
ggtitle('column1')
The issue is that this auto creates hundreds of bins.
I have been looking through the documentation but cannot find an additional argument to specify the number of bins. How can I make it clear to breakdown the column by a fixed number of bins and then map this?

Without a reproducible example it is hard to say exactly what is going on, but it looks like you might be converting a continuous variable into a factor with fill=as.factor(column1).
One option is you remove as.factor and use scale_fill_continuous or some other continuous color scale of your choice.
Another option is to look into cut, where you bin continuous data by specifying the number of bins, or the specific start and end points of your bins.
# Make n bins
map$data_bin <- cut(map$column, breaks = n )
# Or make specific start and end points for bins
map$data_bin <- cut(map$column, breaks = c(-Inf,50,100,Inf) )

Related

Trying to map petition data in R

I am trying to map a UK government petition data in R. I used the boundary data from ONS geography portal. The code works and the first map I created also works.
#Install packages
install.packages("tidyverse")
install.packages("jsonlite")
install.packages("geojsonio")
install.packages("sp")
install.packages("parlitools")
install.packages("rvest")
install.packages("xml2")
install.packages("magrittr")
#Load packages
library(tidyverse)
library(jsonlite)
library(geojsonio)
library(sp)
library(parlitools)
library(rvest)
library(xml2)
library(magrittr)
[#GETTING PETITION DATA
#Importing petition for UK-wide lockdown from JSON format
petition <- fromJSON("https://petition.parliament.uk/petitions/301397.json", flatten = TRUE)
signatures <- petition$data$attributes$signatures_by_constituency %>%
rename(constituency = name)
#MAPPING BOUNDARIES
#Save url for boundary data UK
url <- "https://opendata.arcgis.com/datasets/b64677a2afc3466f80d3d683b71c3468_0.geojson"
#Load and save the boundary data as uk_map
uk_map <- geojson_read (url, what = "sp")
#pcon18cd is code name for constituency (as we can see when we view uk_map). Use fortify to get this data.
fort_uk_map <- fortify(uk_map, region = "pcon18cd")
#MAPPING PETITION DATA
#Join map data to signatures data from constituency using left_join
full_uk_map <- left_join(fort_uk_map, signatures, by = c("id" = "ons_code"))
#Plot-1a: Map of signatures in the whole of UK
ggplot() +
geom_polygon(data = full_uk_map, aes(x = long, y= lat, group = group, fill = signature_count)) +
geom_path(color = "black", size = 0.1) +theme(legend.position = "bottom") +
theme_void() +
labs(x = NULL,
y = NULL,
title = "Signatories of the UK Coronavirus Lockdown Petition",
subtitle = "Let's investigate where the signatures come from",
caption = "Geometries: ONS Open Geography Portal; Data: UK Parliament and Government",
fill = "Signature Count")][1]
But, as you can see from the image, the higher signatures have a lighter color. I would like to change it so that the higher number of signatures have a darker color.
So, I tried this code just below the above code and that's where I am facing issues.
#Change color of legend so that higher signature count equals darker color. Use quantile () [Doesn't work]
no_of_classes <- 9
quantiles <- quantile(full_uk_map$signature_count, probs = seq(0, 1, length.out = no_of_classes + 1))
labels <- c()
for(band in 1:length(quantiles)){
labels <- c(labels, paste0(round(quantiles[band])," - ", round(quantiles[band + 1])))
}
full_uk_map$quantiles <- cut(full_uk_map$signature_count, breaks = quantiles, labels = labels,
include.lowest = T)
labels <- labels[1:length(labels)-1]
#Plot-1b: Map of signatures in the whole of UK [Doesn't work]
sig_map_by_quantile <- ggplot() +
geom_polygon(data = full_uk_map, aes(x = long, y = lat, group = group, fill = quantiles)) +
geom_path(color = "black", size = 0.1) +
scale_fill_brewer(type = 'qual', palette = "Blues", guide = "legend", name = "Signature Count", labels = labels) +
theme_void +
theme(legend.position = "bottom") +
labs(x = NULL,
y = NULL,
title = "Signatories of the UK Coronavirus Lockdown Petition",
caption = "Geometries: ONS Open Geography Portal; Data: UK Parliament and Government")
When I run the full_uk_map$quantiles, this is the error message I see:
> full_uk_map$quantiles <- cut(full_uk_map$signature_count, breaks = quantiles, labels = labels,
+ include.lowest = T)
Error in cut.default(full_uk_map$signature_count, breaks = quantiles, :
lengths of 'breaks' and 'labels' differ
Would anyone be able to help? Much appreciated!
Why you made us go through all that package installation, downloading files from the Internet, fortification, merging, and then waiting for the plot to appear is beyond me.
All you had to ask was why the cut function was returning an error. Your title is totally irrelevant to the problem.
Anyway, the cut function, although not mentioned in the documentation (which is a shame if true), requires that the length of labels be one less than the length of breaks, if breaks is specified as a vector. Apologies to all if this is in fact mentioned in the documentation, but I didn't see it after a good long look. It may be hidden between the lines of the descriptions for the breaks and labels arguments. Note that the breaks argument can be provided as a number (of break-points) or, as in your case, a vector of cut-points.
For example, if breaks = c(1,2,3), then that implies you have two intervals, so you need 2 labels.
In your code, you supply the quantiles vector as the breaks and labels vector and the labels. Both have the same length, which triggers the error; you have 1 too many labels. Solution: make the length of labels one less than the length of breaks.

spatial network plot using ggplot

I would like to reproduce plot of spatial dependency of regions in ggplot2 rather then using basic plot in R
I provided reproduceble example in code below:
I followed example: Plotting neighborhoods network to a ggplot maps
library(leaflet)
library(ggplot2)
library(sf)
library(spdep)
URL <- "https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_CZE_1_sp.rds"
data <- readRDS(url(URL))
ggplot() +
geom_polygon(data = data, aes(x=long, y = lat, group = group), color = "black", fill = F)
cns <- knearneigh(coordinates(data), k = 3, longlat=T)
scnsn <- knn2nb(cns, row.names = NULL, sym = T)
cns
scnsn
cS <- nb2listw(scnsn)
summary(cS)
# Plot of regions and k-nn neighthorhours matrix
plot(data)
plot(cS, coordinates(data), add = T)
I am asking how to reproduce Plot of regions and k-nn neighthorhours matrix using ggplot.
I know we have to retrive each point input and then use geom_segment, however I dont know how to retrive it from cS object.
The other SO post you are refering contains all steps you need to follow to get your plot (thanks to the great answer from #StupidWolf).
Basically, you need to extract the different segment using:
1) Transform coordinates of data in a dataframe, it will facilitate its use later:
data_df <- data.frame(coordinates(data))
colnames(data_df) <- c("long", "lat")
This data_df contains now all x,y values for plotting points.
2) Now, we can retrieve segments informations from the cS object using:
n = length(attributes(cS$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS$neighbours,length)),
to = unlist(cS$neighbours),
weight = unlist(cS$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
In the DA dataframe, you have all informations required to draw each segments
3) Finally, you can put plot every parts:
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "black", fill = FALSE)+
geom_point(data = data_df, aes(x= long, y = lat), size = 1)+
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5)
Again, the solution provided by #StupidWolf was pretty well written and understandable, so I don't know why you were not able to reproduce it.

How to use different (30) colors for multiple blocks (ranges) of values in a ggplot2 map in R?

I would like to use different colors for specific ranges of values a have in grids in a map (Southern Hemisphere), when working with this dataset. I'm not sure if have defined to blocks correctly and I don't know how to ask for different colors for each block where I have at least one occurrence.
I'm using ggplot2 in R to create a map with grids with different numbers of individuals in each. I got a nice plot, but it is showing me only few colors/shades (because I have few grids with high values). So I divided the range of the individuals sighted in each grid (n, that varies from 1-15035) in blocks (by = 100) to then ask R to use a different color/shades considering the block that each grid belongs to (e.g. use one color for the grids where I have 1-100 individuals, another color for the ones with 101-200 individuals, and so on). I know that I have many (151) blocks, but from them there's only 30 where a have at least one occurrence (there's no grid with most of the ranges). There's a mistake in my blocks (breaks in the code provided, as there's some overlap) and I don't know how to include this information when creating the plot to ask for the 30 different colors for each of the blocks where the frequency is different from zero. I tried some options using the ggplot arguments there I kept in the code provided here (the first three lines creating a object 'sc'). How should I specify my blocks (breaks) to avoid the overlap I'm getting? Is the limits argument right in the code? How to ask for different colors for the blocks where I have at least one individuals (n>0)?
Any tip will be very appreciated :)
##Setting workspace
#setwd...
rm(list=ls()) #removing previous objects
#Installing (or loading) necessary packages
packages = c('ggplot2','sp','rgdal','sf','readxl','maps','dplyr')
package.check = lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
#### Data ###
hump = read_excel("data_r.xlsx")
range(hump$Lat_Dec)
range(hump$Lon_Dec)
#Convert df to sf -----------------------------
hbk_sf = st_as_sf(x = hump,
coords = c("Lon_Dec", "Lat_Dec"),
crs = "+init=epsg:4326")
class(hbk_sf)
#Plot
plot(st_geometry(hbk_sf))
#Plot all variables (I don't recommend, it will take some time)
#plot(hbk_sf)
#Create grid--------------
grd = st_make_grid(hbk_sf, cellsize = 10, square = T)
grd = as_Spatial(grd)
grd = st_as_sf(grd)
grd$id = rownames(grd)
class(grd)
#plot(st_geometry(grd),add=T)
#Count individuals at grid------------------
#Spatial join: Add the grid id at hbk dataframe
hbk = st_join(hbk_sf, grd, join = st_within)
range(hbk$id,na.rm=T)
hbk_count = count(as_tibble(hbk), id) #alternativelly hbk_count =
aggregate(hbk$Hbk, by=list(hbk$id), FUN=sum)
hbk_count
#Adding the count in grids
grd_hbk = left_join(grd,hbk_count)
plot(grd_hbk)
range(grd_hbk$n,na.rm=T)
ggplot(grd_hbk,aes(x=n))+geom_density()
#Plotting with ggplot
world = sf::st_as_sf(map('world', plot = FALSE, fill = TRUE))
mymap = ggplot(grd_hbk[!is.na(grd_hbk$n),],aes(fill=n))+
geom_sf(data=world,aes(),fill='grey',lwd=.2)+
geom_sf(alpha=.7,lwd=0)+
scale_fill_distiller(palette = "Spectral")+
coord_sf(crs = st_crs(4326),xlim = c(-165,165), ylim = c(-70,6))+
theme_bw() # + theme(legend.position = 'none')
mymap
mymap = mymap + ggtitle("...")+
theme(plot.title = element_text(color = "black", size = 10, hjust = 0.5))
mymap
ggsave("mymap.png", dpi=300)
#-------
#To give colours to the grids considering the range of catches in each one
#that has at least one catch (grd_hbk>0)
#Tryied to create breaks to then consider in the grd_hbk$n. I wanna R to
#use a different colour/shade considering the range of the grd_hbk$n in
#the grids (one colour for grids with 1-100 catches, another colour for
#grids with 101-200 and so on)---
# Generate breaks to cut the data
breaks = seq(0, 15100, by = 100)
# Cut the data and save the result in an object
r = cut(grd_hbk$n, breaks) #this an overlap (Levels: (0,100] (100,200]
#(200,300]...)
# Check the number of different categories
length(levels(r))
# Name for the levels
levels(r) = as.character(1:152)
#table(levels(r))
table(r)
#create a combination of colours to use in the following plot
#sc = scale_colour_gradientn(colors = 'red', 'blue', 'green')
#sc = scale_fill_grey(start=1, end=15035, aes(fill=y))
#sc = scale_fill_grey(start=min(levels(r)), end=max(levels(r)),
#aes(fill=y))
sc = scale_fill_gradient(low="blue", high="red")
mymapii = ggplot(grd_hbk[!is.na(grd_hbk$n),],aes(fill=n))+
geom_sf(data=world,aes(),fill='grey',lwd=.2)+
geom_sf(alpha=.7,lwd=0)+
scale_colour_manual(limits = min(levels(r)),max(levels(r)),
values = sc, #colors to be used
breaks = breaks,
aes(fill=sc))+ #maybe need another specification here?
coord_sf(crs = st_crs(4326),xlim = c(-165,165), ylim = c(-70,6))+
theme_bw()
mymapii

Creating a heatmap based on values in R

I try to generate a heatmap based on values.
Here is my dataset which consists of three variables: Lat (latitude), Lon (longitude), and Value.
https://www.dropbox.com/s/s53xeplywz9jh15/sample_data.csv?dl=0
I have looked through the relevant posts and found this useful:
Generating spatial heat map via ggmap in R based on a value
I copied the code in that post and here my code looks like:
# import data and libaries
library(ggplot2)
library(ggmap)
Yunan<-read.csv("C:\\Program Files\\RStudio\\data\\pb_sp\\sample_data.csv", header = TRUE)
# call the map to see point distribution
Yunan_map<-get_map(location="yunan",zoom=6,maptype="terrain",scale=2)
ggmap(Yunan_map)+geom_point(data=Yunan,aes(x=Yunan$Lon,y=Yunan$Lat,fill="red",alpha=0.3,size=0.05,shape=21))+scale_shape_identity()
# 1. generate bins for x, y coordinates (unit=decimal degree)
xbreaks <- seq(floor(min(Yunan$Lat,na.rm=TRUE)), ceiling(max(Yunan$Lat,na.rm=TRUE)), by = 0.5)
ybreaks <- seq(floor(min(Yunan$Lon,na.rm=TRUE)), ceiling(max(Yunan$Lon,na.rm=TRUE)), by = 0.5)
# 2. allocate the data points into the bins
Yunan$latbin <- xbreaks[cut(Yunan$Lat, breaks = xbreaks, labels=F)]
Yunan$longbin <- ybreaks[cut(Yunan$Lon, breaks = ybreaks, labels=F)]
# 3. summarise the data for each bin (use the median)
datamat <- Yunan[, list(Value= median(Value)),
by = c("latbin", "longbin" )]
# 4. Merge the summarised data with all possible x, y coordinate combinations to get
# a value for every bin
datamat <- merge(setDT(expand.grid(latbin = xbreaks, longbin = ybreaks)), datamat,
by = c("latbin", "longbin"), all.x = TRUE, all.y = FALSE)
# 5. Fill up the empty bins 0 to smooth the contour plot
datamat[is.na(Value), ]$Value <- 0
# 6. Plot the contours
ggmap(Yunan_map,extent ="device") +
stat_contour(data = datamat, aes(x = longbin, y = latbin, z = Value,
fill = ..level.., alpha = ..level..), geom = 'polygon', binwidth = 30) +
scale_fill_gradient(name = "Value", low = "green", high = "red") +
guides(alpha = FALSE)
However, I encountered two problems
After executing the step 3 (summarise the data for each bin), I got this error message:
Error in [.data.frame(Yunan, , list(Value = median(Value)), by = c("latbin", :
unused argument (by = c("latbin", "longbin"))
I wish to change the colour scheme from gradient to discrete colours, something like this map:
Since the values in my dataset range from 17 to 21, I want to classify them in to different bins such as 17-17.5, 17.5-18, 18-18.5.... with corresponding colours.
Any suggestions that I can fix these problems. Thanks in advance.

Generating spatial heat map via ggmap in R based on a value

I'd like to generate a choropleth map using the following data points:
Longitude
Latitude
Price
Here is the dataset - https://www.dropbox.com/s/0s05cl34bko7ggm/sample_data.csv?dl=0.
I would like the map to show the areas where the price is higher and the where price is lower. It should most probably look like this (sample image):
Here is my code:
library(ggmap)
map <- get_map(location = "austin", zoom = 9)
data <- read.csv(file.choose(), stringsAsFactors = FALSE)
data$average_rate_per_night <- as.numeric(gsub("[\\$,]", "",
data$average_rate_per_night))
ggmap(map, extent = "device") +
stat_contour( data = data, geom="polygon",
aes( x = longitude, y = latitude, z = average_rate_per_night,
fill = ..level.. ) ) +
scale_fill_continuous( name = "Price", low = "yellow", high = "red" )
I'm getting the following error message:
2: Computation failed in `stat_contour()`:
Contour requires single `z` at each combination of `x` and `y`.
I'd really appreciate any help on how this can be fixed or any other method to generate this type of heatmap. Please note that I'm interested in the weight of the price, not density of the records.
If you insist on using the contour approach then you need to provide a value for every possible x,y coordinate combination you have in your data. To achieve this I would highly recommend to grid the space and generate some summary statistics per bin.
I attach a working example below based on the data you provided:
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
# convert the rate from string into numbers
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
# generate bins for the x, y coordinates
xbreaks <- seq(floor(min(data$latitude)), ceiling(max(data$latitude)), by = 0.01)
ybreaks <- seq(floor(min(data$longitude)), ceiling(max(data$longitude)), by = 0.01)
# allocate the data points into the bins
data$latbin <- xbreaks[cut(data$latitude, breaks = xbreaks, labels=F)]
data$longbin <- ybreaks[cut(data$longitude, breaks = ybreaks, labels=F)]
# Summarise the data for each bin
datamat <- data[, list(average_rate_per_night = mean(average_rate_per_night)),
by = c("latbin", "longbin")]
# Merge the summarised data with all possible x, y coordinate combinations to get
# a value for every bin
datamat <- merge(setDT(expand.grid(latbin = xbreaks, longbin = ybreaks)), datamat,
by = c("latbin", "longbin"), all.x = TRUE, all.y = FALSE)
# Fill up the empty bins 0 to smooth the contour plot
datamat[is.na(average_rate_per_night), ]$average_rate_per_night <- 0
# Plot the contours
ggmap(map, extent = "device") +
stat_contour(data = datamat, aes(x = longbin, y = latbin, z = average_rate_per_night,
fill = ..level.., alpha = ..level..), geom = 'polygon', binwidth = 100) +
scale_fill_gradient(name = "Price", low = "green", high = "red") +
guides(alpha = FALSE)
You can then play around with the bin size and the contour binwidth to get the desired result but you could additionally apply a smoothing function on the grid to get an even smoother contour plot.
You could use the stat_summary_2d() or stat_summary_hex() function to achieve a similar result. These functions divide the data into bins (defined by x and y), and then the z values for each bin are summarised based on a given function. In the example below I have selected mean as an aggregation function and the map basically shows the average price in each bin.
Note: I needed to treat your average_rate_per_night variable appropriately in order to convert it into numbers (removed the $ sign and the comma).
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
ggmap(map, extent = "device") +
stat_summary_2d(data = data, aes(x = longitude, y = latitude,
z = average_rate_per_night), fun = mean, alpha = 0.6, bins = 30) +
scale_fill_gradient(name = "Price", low = "green", high = "red")

Resources