How to add diagonal lines in NA value polygons using ggplot? - r

I'm working to plot the consolidated Z-value deviations (for a series of factors) from the national average for Pakistan on a fortified SPDF. For the purposes of this question, my data is irrelevant. I could provide it if necessary.
I am using ggplot to create my output where the command and result look something like this:
ggplot() + geom_polygon(data = plot.pakmod_sumZ, aes(x = long, y = lat, group = group, fill = SumZ.Cat), color = "black", size = 0.25, na.rm = TRUE) + scale_fill_manual(name = "Deviations from National Average", labels = c("-7", "-6", "-5", "-4", "-3", "-2", "-1", "Positive"), values = c("darkorange4","brown", "orangered1","tomato1","darkorange3","orange","yellow", "greenyellow"), na.value = "Grey", guide = guide_legend(reverse = TRUE)) + coord_map() + labs(x = NULL, y = NULL) + scale_x_discrete(breaks = NULL) + scale_y_discrete(breaks = NULL) + theme_minimal()
Deviations from National Average
I am trying to figure out now if it's possible to add diagonal lines in the polygons which have missing values and are coloured grey. Can this be done using ggplot?

This is an example I took from here. I opted to use the horizontal error bar geom. Mind that this isn't the only way of doing this.
library(ggplot2)
library(sp)
library(rgdal)
library(rgeos)
# create a local directory for the data
localDir <- "R_GIS_data"
if (!file.exists(localDir)) {
dir.create(localDir)
}
# download and unzip the data
url <- "ftp://www.ecy.wa.gov/gis_a/inlandWaters/wria.zip"
file <- paste(localDir, basename(url), sep='/')
if (!file.exists(file)) {
download.file(url, file)
unzip(file,exdir=localDir)
}
# create a layer name for the shapefiles (text before file extension)
layerName <- "WRIA_poly"
# read data into a SpatialPolygonsDataFrame object
dataProjected <- readOGR(dsn=localDir, layer=layerName)
dataProjected#data$id <- rownames(dataProjected#data)
# create a data.frame from our spatial object
watershedPoints <- fortify(dataProjected)
# merge the "fortified" data with the data from our spatial object
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
dataProjected#data$id <- rownames(dataProjected#data)
watershedPoints <- fortify(dataProjected)
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
ggWatershed <- ggplot(data = watershedDF, aes(x=long, y=lat, group = group, fill = WRIA_NM)) +
geom_polygon() +
geom_path(color = "white") +
scale_fill_hue(l = 40) +
coord_equal() +
theme(legend.position = "none", title = element_blank())
# Adding coordinates to the data part of SPDF. `sd` is the variable of interest
# which is beign plotted here. Each line extends sd away from long coordinate
dataProjected#data$sd <- rnorm(nrow(xy), mean = 50000, sd = 10000)
xy <- coordinates(dataProjected)
dataProjected#data$long <- xy[, 1]
dataProjected#data$lat <- xy[, 2]
ggWatershed +
geom_errorbarh(data = dataProjected#data, aes(group = id, xmin = long - sd, xmax = long + sd))

Related

Spatial network based on maximum distance in ggplot2

I would like to plot network matrix of regions in ggplot - I know that for ggplot we need data.frame in tidy format in order to plot it.
I am able to plot network based on number of neighbours in ggplot however when I need spatial network based on maximum distance I get an error when creating data frame for ggplot.
I provided example down bellow:
library(ggplot2)
library(sf)
library(spdep)
# Polygon data
URL <- "https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_DEU_1_sp.rds"
data <- readRDS(url(URL))
CORD <- rbind(
coordinates(data)
)
rownames(CORD) <- NULL
# Spatial Network based on number of neighbours
cns <- knearneigh(CORD, k = 5, longlat=T)
scnsn <- knn2nb(cns, row.names = NULL, sym = T)
cS <- nb2listw(scnsn)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
# Creating dataframe from spatail network (neiresth neighbours) for ggplot plot
n = length(attributes(cS$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS$neighbours,length)),
to = unlist(cS$neighbours),
weight = unlist(cS$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = FALSE) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()
### Another type of network matrix - Maximum distance
nb200km <- dnearneigh(CORD, d1=0, d2=100, longlat=T)
summary(nb200km)
cS_distance <- nb2listw(nb200km, zero.policy = T)
# I need to recreate this plot in ggplot
plot(data)
plot(W, coordinates(data), add = T)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
n = length(attributes(cS_distance$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS_distance$neighboaurs,length)),
to = unlist(cS_distance$neighbours),
weight = unlist(cS_distance$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
creating dataframe from cS object works, however creating a dataframe from cS_distance object returns an error.
I would like to ask how to solve the error and plot distance spatial network in ggplot.
I'm not sure if this is what you're looking for, but the problem seems to be that you have some regions with no neighbours in cS_distance, so DA$to contains some zero values. This means when you do data_df[DA$from,] it has more rows than data_df[DA$to,], and your code throws an error when you try to cbind them.
If you filter out the rows where DA$to is zero, you get this:
n = length(attributes(cS_distance$neighbours)$region.id)
from <- rep(1:n,sapply(cS_distance$neighbours,length))
to <- unlist(cS_distance$neighbours)[]
weight <- numeric(length(to))
weight[which(to != 0)] <- unlist(cS_distance$weights)
DA = data.frame(from = from, to = to, weight = weight)
DA <- DA[DA$to != 0,]
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = NA) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()

Create shaded polygons around points with ggplot2

I saw yesterday this beautiful map of McDonalds restaurants in USA. I wanted to replicate it for France (I found some data that can be downloaded here).
I have no problem plotting the dots:
library(readxl)
library(ggplot2)
library(raster)
#open data
mac_do_FR <- read_excel("./mcdo_france.xlsx")
mac_do_FR_df <- as.data.frame(mac_do_FR)
#get a map of France
mapaFR <- getData("GADM", country="France", level=0)
#plot dots on the map
ggplot() +
geom_polygon(data = mapaFR, aes(x = long, y = lat, group = group),
fill = "transparent", size = 0.1, color="black") +
geom_point(data = mac_do_FR_df, aes(x = lon, y = lat),
colour = "orange", size = 1)
I tried several methods (Thiessen polygons, heat maps, buffers), but the results I get are very poor. I can't figure out how the shaded polygons were plotted on the American map. Any pointers?
Here's my result, but it did take some manual data wrangling.
Step 1: Get geospatial data.
library(sp)
# generate a map of France, along with a fortified dataframe version for ease of
# referencing lat / long ranges
mapaFR <- raster::getData("GADM", country="France", level=0)
map.FR <- fortify(mapaFR)
# generate a spatial point version of the same map, defining your own grid size
# (a smaller size yields a higher resolution heatmap in the final product, but will
# take longer to calculate)
grid.size = 0.01
points.FR <- expand.grid(
x = seq(min(map.FR$long), max(map.FR$long), by = grid.size),
y = seq(min(map.FR$lat), max(map.FR$lat), by = grid.size)
)
points.FR <- SpatialPoints(coords = points.FR, proj4string = mapaFR#proj4string)
Step 2: Generate a voronoi diagram based on store locations, & obtain the corresponding polygons as a SpatialPolygonsDataFrame object.
library(deldir)
library(dplyr)
voronoi.tiles <- deldir(mac_do_FR_df$lon, mac_do_FR_df$lat,
rw = c(min(map.FR$long), max(map.FR$long),
min(map.FR$lat), max(map.FR$lat)))
voronoi.tiles <- tile.list(voronoi.tiles)
voronoi.center <- lapply(voronoi.tiles,
function(l) data.frame(x.center = l$pt[1],
y.center = l$pt[2],
ptNum = l$ptNum)) %>%
data.table::rbindlist()
voronoi.polygons <- lapply(voronoi.tiles,
function(l) Polygon(coords = matrix(c(l$x, l$y),
ncol = 2),
hole = FALSE) %>%
list() %>%
Polygons(ID = l$ptNum)) %>%
SpatialPolygons(proj4string = mapaFR#proj4string) %>%
SpatialPolygonsDataFrame(data = voronoi.center,
match.ID = "ptNum")
rm(voronoi.tiles, voronoi.center)
Step 3. Check which voronoi polygon each point on the map overlaps with, & calculate its distance to the corresponding nearest store.
which.voronoi <- over(points.FR, voronoi.polygons)
points.FR <- cbind(as.data.frame(points.FR), which.voronoi)
rm(which.voronoi)
points.FR <- points.FR %>%
rowwise() %>%
mutate(dist = geosphere::distm(x = c(x, y), y = c(x.center, y.center))) %>%
ungroup() %>%
mutate(dist = ifelse(is.na(dist), max(dist, na.rm = TRUE), dist)) %>%
mutate(dist = dist / 1000) # convert from m to km for easier reading
Step 4. Plot, adjusting the fill gradient parameters as needed. I felt the result of a square root transformation looks quite good for emphasizing distances close to a store, while a log transformation is rather too exaggerated, but your mileage may vary.
ggplot() +
geom_raster(data = points.FR %>%
mutate(dist = pmin(dist, 100)),
aes(x = x, y = y, fill = dist)) +
# optional. shows outline of France for reference
geom_polygon(data = map.FR,
aes(x = long, y = lat, group = group),
fill = NA, colour = "white") +
# define colour range, mid point, & transformation (if desired) for fill
scale_fill_gradient2(low = "yellow", mid = "red", high = "black",
midpoint = 4, trans = "sqrt") +
labs(x = "longitude",
y = "latitude",
fill = "Distance in km") +
coord_quickmap()

Generating spatial heat map via ggmap in R based on a value

I'd like to generate a choropleth map using the following data points:
Longitude
Latitude
Price
Here is the dataset - https://www.dropbox.com/s/0s05cl34bko7ggm/sample_data.csv?dl=0.
I would like the map to show the areas where the price is higher and the where price is lower. It should most probably look like this (sample image):
Here is my code:
library(ggmap)
map <- get_map(location = "austin", zoom = 9)
data <- read.csv(file.choose(), stringsAsFactors = FALSE)
data$average_rate_per_night <- as.numeric(gsub("[\\$,]", "",
data$average_rate_per_night))
ggmap(map, extent = "device") +
stat_contour( data = data, geom="polygon",
aes( x = longitude, y = latitude, z = average_rate_per_night,
fill = ..level.. ) ) +
scale_fill_continuous( name = "Price", low = "yellow", high = "red" )
I'm getting the following error message:
2: Computation failed in `stat_contour()`:
Contour requires single `z` at each combination of `x` and `y`.
I'd really appreciate any help on how this can be fixed or any other method to generate this type of heatmap. Please note that I'm interested in the weight of the price, not density of the records.
If you insist on using the contour approach then you need to provide a value for every possible x,y coordinate combination you have in your data. To achieve this I would highly recommend to grid the space and generate some summary statistics per bin.
I attach a working example below based on the data you provided:
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
# convert the rate from string into numbers
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
# generate bins for the x, y coordinates
xbreaks <- seq(floor(min(data$latitude)), ceiling(max(data$latitude)), by = 0.01)
ybreaks <- seq(floor(min(data$longitude)), ceiling(max(data$longitude)), by = 0.01)
# allocate the data points into the bins
data$latbin <- xbreaks[cut(data$latitude, breaks = xbreaks, labels=F)]
data$longbin <- ybreaks[cut(data$longitude, breaks = ybreaks, labels=F)]
# Summarise the data for each bin
datamat <- data[, list(average_rate_per_night = mean(average_rate_per_night)),
by = c("latbin", "longbin")]
# Merge the summarised data with all possible x, y coordinate combinations to get
# a value for every bin
datamat <- merge(setDT(expand.grid(latbin = xbreaks, longbin = ybreaks)), datamat,
by = c("latbin", "longbin"), all.x = TRUE, all.y = FALSE)
# Fill up the empty bins 0 to smooth the contour plot
datamat[is.na(average_rate_per_night), ]$average_rate_per_night <- 0
# Plot the contours
ggmap(map, extent = "device") +
stat_contour(data = datamat, aes(x = longbin, y = latbin, z = average_rate_per_night,
fill = ..level.., alpha = ..level..), geom = 'polygon', binwidth = 100) +
scale_fill_gradient(name = "Price", low = "green", high = "red") +
guides(alpha = FALSE)
You can then play around with the bin size and the contour binwidth to get the desired result but you could additionally apply a smoothing function on the grid to get an even smoother contour plot.
You could use the stat_summary_2d() or stat_summary_hex() function to achieve a similar result. These functions divide the data into bins (defined by x and y), and then the z values for each bin are summarised based on a given function. In the example below I have selected mean as an aggregation function and the map basically shows the average price in each bin.
Note: I needed to treat your average_rate_per_night variable appropriately in order to convert it into numbers (removed the $ sign and the comma).
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
ggmap(map, extent = "device") +
stat_summary_2d(data = data, aes(x = longitude, y = latitude,
z = average_rate_per_night), fun = mean, alpha = 0.6, bins = 30) +
scale_fill_gradient(name = "Price", low = "green", high = "red")

Overlay region borders in R map generates unwanted lines

I have already generated a simple map for Nigerian states, and now I would like to highlight in my map the borders for the Nigerian regions (that group Nigerian states).
When I add the layer for the borders with geom_polygon, they appear lines that do not correspond to region borders. I found a similar problem here Map county borders on to a ggmap
but this does not seem to be working for my case.
Here are the shapefiles and the database I am working on:
https://www.dropbox.com/sh/cek92s50jixowfx/AABwIVZKvtff8-9slhfCbxEca?dl=0
The code I am using is
#LOAD SHAPEFILES AND DATABASE
ng_dist <- readShapeSpatial("NGA_adm1.shp")
ng_dist_regions <- readShapeSpatial("NGA_adm_Region.shp")
NG_States <- read.csv("State_color_map.csv", header = TRUE, sep=",")
#VERIFY THE MAPS LOADED PROPERLY
plot(ng_dist)
plot(ng_dist_regions)
# STATE MAP - fortify and match shapefile and database IDs names
intersect(ng_dist$NAME_1, NG_States$STATE)
ng_dist <- fortify(ng_dist, region = "NAME_1")
ng_dist$id[which(ng_dist$id == "Akwa Ibom")] <- "Akwa-ibom"
ng_dist$id[which(ng_dist$id == "Nassarawa")] <- "Nasarawa"
ng_dist$id[which(ng_dist$id == "Cross River")] <- "C/river"
ng_dist$id[which(ng_dist$id == "Federal Capital Territory")] <- "FCT"
intersect(ng_dist$id, NG_States$STATE)
# REGION MAP - fortify
ng_dist_regions <- fortify(ng_dist_regions, region = "Region")
### Convert dummy variable to factor
NG_States$Abia <- as.factor(NG_States$Abia)
#PLOT MAP with coloured Abia State
cols <- c("0" = "#e6e6e6","1" = "#6eab27")
ABIA <- NG_States$Abia
Abia_map <- ggplot(NG_States, aes(fill = ABIA)) +
geom_map(data = NG_States, aes(map_id = NG_States$STATE, fill = ABIA), map = ng_dist, color = "black", size = 0.10) +
expand_limits(x = ng_dist$long, y = ng_dist$lat) +
theme_nothing(legend = FALSE) +
labs(title="Abia") +
coord_map() +
scale_fill_manual(name="", values=cols, labels=c("","Abia"))
Abia_map
#Add layer for region borders
d <- Abia_map +
geom_polygon(aes(x = ng_dist_regions$long, y = ng_dist_regions$lat, group = ng_dist_regions$id, fill = NA), data = ng_dist_regions, color = "red", size = 0.8)
d
Here is my result
Nigerian States and Regions
I have tried to add other options, such as coord_fixed() or expand_limits(x = ng_dist_regions$long, y = ng_dist_regions$lat), but I am quite basic R user and I don't know other solutions.
Using group, instead of id as group seems to solve the problem.
d <- Abia_map +
geom_path(aes(x = long, y = lat, group = group), data = ng_dist_regions, color = "red", size = 0.8, inherit.aes = FALSE)
d

Choropleth Map in ggplot2

I'm trying to reproduce the Choropleth Map given here with the code provided by Hadley.
library(ggplot2)
library(maps)
# First (and most annoying) task - get matching state and county variables
# for both datasets. And unfortauntely it's not quite right, as you can
# see from the finish product - some counties are missing.
unemp <- read.csv("unemployment09.csv", header = F, stringsAsFactors = F)
names(unemp) <- c("id", "state_fips", "county_fips", "name", "year",
"?", "?", "?", "rate")
unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name))
unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name)
county_df <- map_data("county")
names(county_df) <- c("long", "lat", "group", "order", "state_name", "county")
county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))]
county_df$state_name <- NULL
state_df <- map_data("state")
# Combine together
choropleth <- merge(county_df, unemp, by = c("state", "county"))
choropleth <- choropleth[order(choropleth$order), ]
# Discretise rate to use with Brewer colour scheme - many options here
# choropleth$rate_d <- cut_number(choropleth$rate, 5)
# choropleth$rate_d <- cut_interval(choropleth$rate, 5)
# Nathan's choice is a little odd:
choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35))
# Once you have the data in the right format, recreating the plot is straight
# forward.
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
But this code gives the following error:
Error in do.call("layer", list(mapping = mapping, data = data, stat = stat, :
could not find function "alpha"
Deleting alpha and using this code
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = "white", size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(pal = "PuRd")
gives the following error:
Error in scale_map.discrete(scale, df[[j]]) : attempt to apply
non-function
How can I fix this problem?

Resources