I would like to plot network matrix of regions in ggplot - I know that for ggplot we need data.frame in tidy format in order to plot it.
I am able to plot network based on number of neighbours in ggplot however when I need spatial network based on maximum distance I get an error when creating data frame for ggplot.
I provided example down bellow:
library(ggplot2)
library(sf)
library(spdep)
# Polygon data
URL <- "https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_DEU_1_sp.rds"
data <- readRDS(url(URL))
CORD <- rbind(
coordinates(data)
)
rownames(CORD) <- NULL
# Spatial Network based on number of neighbours
cns <- knearneigh(CORD, k = 5, longlat=T)
scnsn <- knn2nb(cns, row.names = NULL, sym = T)
cS <- nb2listw(scnsn)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
# Creating dataframe from spatail network (neiresth neighbours) for ggplot plot
n = length(attributes(cS$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS$neighbours,length)),
to = unlist(cS$neighbours),
weight = unlist(cS$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = FALSE) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()
### Another type of network matrix - Maximum distance
nb200km <- dnearneigh(CORD, d1=0, d2=100, longlat=T)
summary(nb200km)
cS_distance <- nb2listw(nb200km, zero.policy = T)
# I need to recreate this plot in ggplot
plot(data)
plot(W, coordinates(data), add = T)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
n = length(attributes(cS_distance$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS_distance$neighboaurs,length)),
to = unlist(cS_distance$neighbours),
weight = unlist(cS_distance$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
creating dataframe from cS object works, however creating a dataframe from cS_distance object returns an error.
I would like to ask how to solve the error and plot distance spatial network in ggplot.
I'm not sure if this is what you're looking for, but the problem seems to be that you have some regions with no neighbours in cS_distance, so DA$to contains some zero values. This means when you do data_df[DA$from,] it has more rows than data_df[DA$to,], and your code throws an error when you try to cbind them.
If you filter out the rows where DA$to is zero, you get this:
n = length(attributes(cS_distance$neighbours)$region.id)
from <- rep(1:n,sapply(cS_distance$neighbours,length))
to <- unlist(cS_distance$neighbours)[]
weight <- numeric(length(to))
weight[which(to != 0)] <- unlist(cS_distance$weights)
DA = data.frame(from = from, to = to, weight = weight)
DA <- DA[DA$to != 0,]
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = NA) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()
Related
I have spatial coordinates in a data frame where each row (Longitude, Latitude) corresponds to the occurrence of an event I am following. I tried to map these data but instead of using points, I want to create a grid with cells of a resolution of 5 nautical miles (~ 0.083333) and count the number of occurrences of the event is each cell and plot it.
This is the code I came to write with the help of some resources. But it doesn't look the way I expected it to be. Can you figure out what's I'm doing wrong? I attached the raw positions and the resulting map I get.
Here is the link to the data.
re_pi = read.csv(file = "~/Desktop/Events.csv")
gridx <- seq(from=-19,to=-10,by=0.083333)
gridy <- seq(from=20,to=29,by=0.083333)
xcell <- unlist(lapply(re_pi$LON,function(x) min(which(gridx>x))))
ycell <- unlist(lapply(re_pi$LAT,function(y) min(which(gridy>y))))
re_pi$cell <- (length(gridx) - 1) * ycell + xcell
rr = re_pi %>%
group_by(cell)%>%
summarise(Lat = mean(LAT),Lon = mean(LON),Freq = length(cell))
my_theme <- theme_bw() + theme(panel.ontop=TRUE, panel.background=element_blank())
my_cols <- scale_color_distiller(palette='Spectral')
my_fill <- scale_fill_distiller(palette='Spectral')
ggplot(rr, aes(y=Lat, x=Lon, fill=Effort)) + geom_tile(width=1.2, height=1.2) +
borders('world', xlim=range(rr$Lon), ylim=range(rr$Lat), colour='black') + my_theme + my_fill +
coord_quickmap(xlim=range(rr$Lon), ylim=range(rr$Lat))
Nice dataset, assume these are fishing vessel VMS data. Here may be one way to achieve your objective, heavily reliant on the tidyverse and by-passing raster and shapes.
library(tidyverse)
library(mapdata) # higher resolution maps
# poor man's gridding function
grade <- function (x, dx) {
if (dx > 1)
warning("Not tested for grids larger than one")
brks <- seq(floor(min(x)), ceiling(max(x)), dx)
ints <- findInterval(x, brks, all.inside = TRUE)
x <- (brks[ints] + brks[ints + 1])/2
return(x)
}
d <-
read_csv("https://raw.githubusercontent.com/abenmhamed/data/main/Events.csv") %>%
janitor::clean_names() %>%
# make a grid 0.01 x 0.01 longitude / latitude
mutate(lon = grade(lon, 0.01),
lat = grade(lat, 0.01)) %>%
group_by(lon, lat) %>%
count() %>%
# not much happening south of 21 and north of 26
filter(between(lat, 21, 26.25))
d %>%
ggplot() +
theme_bw() +
geom_tile(aes(lon, lat, fill = n)) +
scale_fill_viridis_c(option = "B", direction = -1) +
# only data within the data-bounds
borders(database = "worldHires",
xlim = range(d$lon), ylim = range(d$lat),
fill = "grey") +
labs(x = NULL, y = NULL, fill = "Effort") +
# limit plot
coord_quickmap(xlim = range(d$lon), ylim = range(d$lat)) +
# legends within plot
theme(legend.position = c(0.77, 0.26))
Here is my attempt using the sf package. First I imported your data and converted it to an sf object. Then, I created another sf object which includes the grids. I used the raster package and the sf package in order to create the grids. Once I had the two sf object, I counted how many data points exist in each grid and added the results as a new column in foo. Finally, I drew a graphic.
library(tidyverse)
library(sf)
library(raster)
library(viridis)
# Import the data and convert it to an sf object
mydata <- read_csv("https://raw.githubusercontent.com/abenmhamed/data/main/Events.csv") %>%
st_as_sf(coords = c("LON", "LAT"),
crs = 4326, agr = "constant")
# Create an sf object for the grid
gridx <- seq(from = -19,to = -10, by = 0.083333)
gridy <- seq(from = 20,to = 29, by = 0.083333)
foo <- raster(xmn = -19, xmx = -10,
ymn = 20, ymx = 29,
nrows = length(gridx),
ncols = length(gridy)) %>%
rasterToPolygons() %>%
st_as_sf(crs = 4326) %>%
mutate(group = 1:(length(gridx)*length(gridy))) %>%
st_cast("MULTIPOLYGON")
# Now count how many data points exist in each grid
mutate(foo,
count = lengths(st_intersects(x = foo, y = mydata))) -> foo
# Draw a graphic
ggplot() +
geom_sf(data = foo, aes(fill = count)) +
scale_fill_viridis(option = "D") -> g
I would like to reproduce plot of spatial dependency of regions in ggplot2 rather then using basic plot in R
I provided reproduceble example in code below:
I followed example: Plotting neighborhoods network to a ggplot maps
library(leaflet)
library(ggplot2)
library(sf)
library(spdep)
URL <- "https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_CZE_1_sp.rds"
data <- readRDS(url(URL))
ggplot() +
geom_polygon(data = data, aes(x=long, y = lat, group = group), color = "black", fill = F)
cns <- knearneigh(coordinates(data), k = 3, longlat=T)
scnsn <- knn2nb(cns, row.names = NULL, sym = T)
cns
scnsn
cS <- nb2listw(scnsn)
summary(cS)
# Plot of regions and k-nn neighthorhours matrix
plot(data)
plot(cS, coordinates(data), add = T)
I am asking how to reproduce Plot of regions and k-nn neighthorhours matrix using ggplot.
I know we have to retrive each point input and then use geom_segment, however I dont know how to retrive it from cS object.
The other SO post you are refering contains all steps you need to follow to get your plot (thanks to the great answer from #StupidWolf).
Basically, you need to extract the different segment using:
1) Transform coordinates of data in a dataframe, it will facilitate its use later:
data_df <- data.frame(coordinates(data))
colnames(data_df) <- c("long", "lat")
This data_df contains now all x,y values for plotting points.
2) Now, we can retrieve segments informations from the cS object using:
n = length(attributes(cS$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS$neighbours,length)),
to = unlist(cS$neighbours),
weight = unlist(cS$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
In the DA dataframe, you have all informations required to draw each segments
3) Finally, you can put plot every parts:
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "black", fill = FALSE)+
geom_point(data = data_df, aes(x= long, y = lat), size = 1)+
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5)
Again, the solution provided by #StupidWolf was pretty well written and understandable, so I don't know why you were not able to reproduce it.
I saw yesterday this beautiful map of McDonalds restaurants in USA. I wanted to replicate it for France (I found some data that can be downloaded here).
I have no problem plotting the dots:
library(readxl)
library(ggplot2)
library(raster)
#open data
mac_do_FR <- read_excel("./mcdo_france.xlsx")
mac_do_FR_df <- as.data.frame(mac_do_FR)
#get a map of France
mapaFR <- getData("GADM", country="France", level=0)
#plot dots on the map
ggplot() +
geom_polygon(data = mapaFR, aes(x = long, y = lat, group = group),
fill = "transparent", size = 0.1, color="black") +
geom_point(data = mac_do_FR_df, aes(x = lon, y = lat),
colour = "orange", size = 1)
I tried several methods (Thiessen polygons, heat maps, buffers), but the results I get are very poor. I can't figure out how the shaded polygons were plotted on the American map. Any pointers?
Here's my result, but it did take some manual data wrangling.
Step 1: Get geospatial data.
library(sp)
# generate a map of France, along with a fortified dataframe version for ease of
# referencing lat / long ranges
mapaFR <- raster::getData("GADM", country="France", level=0)
map.FR <- fortify(mapaFR)
# generate a spatial point version of the same map, defining your own grid size
# (a smaller size yields a higher resolution heatmap in the final product, but will
# take longer to calculate)
grid.size = 0.01
points.FR <- expand.grid(
x = seq(min(map.FR$long), max(map.FR$long), by = grid.size),
y = seq(min(map.FR$lat), max(map.FR$lat), by = grid.size)
)
points.FR <- SpatialPoints(coords = points.FR, proj4string = mapaFR#proj4string)
Step 2: Generate a voronoi diagram based on store locations, & obtain the corresponding polygons as a SpatialPolygonsDataFrame object.
library(deldir)
library(dplyr)
voronoi.tiles <- deldir(mac_do_FR_df$lon, mac_do_FR_df$lat,
rw = c(min(map.FR$long), max(map.FR$long),
min(map.FR$lat), max(map.FR$lat)))
voronoi.tiles <- tile.list(voronoi.tiles)
voronoi.center <- lapply(voronoi.tiles,
function(l) data.frame(x.center = l$pt[1],
y.center = l$pt[2],
ptNum = l$ptNum)) %>%
data.table::rbindlist()
voronoi.polygons <- lapply(voronoi.tiles,
function(l) Polygon(coords = matrix(c(l$x, l$y),
ncol = 2),
hole = FALSE) %>%
list() %>%
Polygons(ID = l$ptNum)) %>%
SpatialPolygons(proj4string = mapaFR#proj4string) %>%
SpatialPolygonsDataFrame(data = voronoi.center,
match.ID = "ptNum")
rm(voronoi.tiles, voronoi.center)
Step 3. Check which voronoi polygon each point on the map overlaps with, & calculate its distance to the corresponding nearest store.
which.voronoi <- over(points.FR, voronoi.polygons)
points.FR <- cbind(as.data.frame(points.FR), which.voronoi)
rm(which.voronoi)
points.FR <- points.FR %>%
rowwise() %>%
mutate(dist = geosphere::distm(x = c(x, y), y = c(x.center, y.center))) %>%
ungroup() %>%
mutate(dist = ifelse(is.na(dist), max(dist, na.rm = TRUE), dist)) %>%
mutate(dist = dist / 1000) # convert from m to km for easier reading
Step 4. Plot, adjusting the fill gradient parameters as needed. I felt the result of a square root transformation looks quite good for emphasizing distances close to a store, while a log transformation is rather too exaggerated, but your mileage may vary.
ggplot() +
geom_raster(data = points.FR %>%
mutate(dist = pmin(dist, 100)),
aes(x = x, y = y, fill = dist)) +
# optional. shows outline of France for reference
geom_polygon(data = map.FR,
aes(x = long, y = lat, group = group),
fill = NA, colour = "white") +
# define colour range, mid point, & transformation (if desired) for fill
scale_fill_gradient2(low = "yellow", mid = "red", high = "black",
midpoint = 4, trans = "sqrt") +
labs(x = "longitude",
y = "latitude",
fill = "Distance in km") +
coord_quickmap()
I'd like to generate a choropleth map using the following data points:
Longitude
Latitude
Price
Here is the dataset - https://www.dropbox.com/s/0s05cl34bko7ggm/sample_data.csv?dl=0.
I would like the map to show the areas where the price is higher and the where price is lower. It should most probably look like this (sample image):
Here is my code:
library(ggmap)
map <- get_map(location = "austin", zoom = 9)
data <- read.csv(file.choose(), stringsAsFactors = FALSE)
data$average_rate_per_night <- as.numeric(gsub("[\\$,]", "",
data$average_rate_per_night))
ggmap(map, extent = "device") +
stat_contour( data = data, geom="polygon",
aes( x = longitude, y = latitude, z = average_rate_per_night,
fill = ..level.. ) ) +
scale_fill_continuous( name = "Price", low = "yellow", high = "red" )
I'm getting the following error message:
2: Computation failed in `stat_contour()`:
Contour requires single `z` at each combination of `x` and `y`.
I'd really appreciate any help on how this can be fixed or any other method to generate this type of heatmap. Please note that I'm interested in the weight of the price, not density of the records.
If you insist on using the contour approach then you need to provide a value for every possible x,y coordinate combination you have in your data. To achieve this I would highly recommend to grid the space and generate some summary statistics per bin.
I attach a working example below based on the data you provided:
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
# convert the rate from string into numbers
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
# generate bins for the x, y coordinates
xbreaks <- seq(floor(min(data$latitude)), ceiling(max(data$latitude)), by = 0.01)
ybreaks <- seq(floor(min(data$longitude)), ceiling(max(data$longitude)), by = 0.01)
# allocate the data points into the bins
data$latbin <- xbreaks[cut(data$latitude, breaks = xbreaks, labels=F)]
data$longbin <- ybreaks[cut(data$longitude, breaks = ybreaks, labels=F)]
# Summarise the data for each bin
datamat <- data[, list(average_rate_per_night = mean(average_rate_per_night)),
by = c("latbin", "longbin")]
# Merge the summarised data with all possible x, y coordinate combinations to get
# a value for every bin
datamat <- merge(setDT(expand.grid(latbin = xbreaks, longbin = ybreaks)), datamat,
by = c("latbin", "longbin"), all.x = TRUE, all.y = FALSE)
# Fill up the empty bins 0 to smooth the contour plot
datamat[is.na(average_rate_per_night), ]$average_rate_per_night <- 0
# Plot the contours
ggmap(map, extent = "device") +
stat_contour(data = datamat, aes(x = longbin, y = latbin, z = average_rate_per_night,
fill = ..level.., alpha = ..level..), geom = 'polygon', binwidth = 100) +
scale_fill_gradient(name = "Price", low = "green", high = "red") +
guides(alpha = FALSE)
You can then play around with the bin size and the contour binwidth to get the desired result but you could additionally apply a smoothing function on the grid to get an even smoother contour plot.
You could use the stat_summary_2d() or stat_summary_hex() function to achieve a similar result. These functions divide the data into bins (defined by x and y), and then the z values for each bin are summarised based on a given function. In the example below I have selected mean as an aggregation function and the map basically shows the average price in each bin.
Note: I needed to treat your average_rate_per_night variable appropriately in order to convert it into numbers (removed the $ sign and the comma).
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
ggmap(map, extent = "device") +
stat_summary_2d(data = data, aes(x = longitude, y = latitude,
z = average_rate_per_night), fun = mean, alpha = 0.6, bins = 30) +
scale_fill_gradient(name = "Price", low = "green", high = "red")
I'm working to plot the consolidated Z-value deviations (for a series of factors) from the national average for Pakistan on a fortified SPDF. For the purposes of this question, my data is irrelevant. I could provide it if necessary.
I am using ggplot to create my output where the command and result look something like this:
ggplot() + geom_polygon(data = plot.pakmod_sumZ, aes(x = long, y = lat, group = group, fill = SumZ.Cat), color = "black", size = 0.25, na.rm = TRUE) + scale_fill_manual(name = "Deviations from National Average", labels = c("-7", "-6", "-5", "-4", "-3", "-2", "-1", "Positive"), values = c("darkorange4","brown", "orangered1","tomato1","darkorange3","orange","yellow", "greenyellow"), na.value = "Grey", guide = guide_legend(reverse = TRUE)) + coord_map() + labs(x = NULL, y = NULL) + scale_x_discrete(breaks = NULL) + scale_y_discrete(breaks = NULL) + theme_minimal()
Deviations from National Average
I am trying to figure out now if it's possible to add diagonal lines in the polygons which have missing values and are coloured grey. Can this be done using ggplot?
This is an example I took from here. I opted to use the horizontal error bar geom. Mind that this isn't the only way of doing this.
library(ggplot2)
library(sp)
library(rgdal)
library(rgeos)
# create a local directory for the data
localDir <- "R_GIS_data"
if (!file.exists(localDir)) {
dir.create(localDir)
}
# download and unzip the data
url <- "ftp://www.ecy.wa.gov/gis_a/inlandWaters/wria.zip"
file <- paste(localDir, basename(url), sep='/')
if (!file.exists(file)) {
download.file(url, file)
unzip(file,exdir=localDir)
}
# create a layer name for the shapefiles (text before file extension)
layerName <- "WRIA_poly"
# read data into a SpatialPolygonsDataFrame object
dataProjected <- readOGR(dsn=localDir, layer=layerName)
dataProjected#data$id <- rownames(dataProjected#data)
# create a data.frame from our spatial object
watershedPoints <- fortify(dataProjected)
# merge the "fortified" data with the data from our spatial object
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
dataProjected#data$id <- rownames(dataProjected#data)
watershedPoints <- fortify(dataProjected)
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
ggWatershed <- ggplot(data = watershedDF, aes(x=long, y=lat, group = group, fill = WRIA_NM)) +
geom_polygon() +
geom_path(color = "white") +
scale_fill_hue(l = 40) +
coord_equal() +
theme(legend.position = "none", title = element_blank())
# Adding coordinates to the data part of SPDF. `sd` is the variable of interest
# which is beign plotted here. Each line extends sd away from long coordinate
dataProjected#data$sd <- rnorm(nrow(xy), mean = 50000, sd = 10000)
xy <- coordinates(dataProjected)
dataProjected#data$long <- xy[, 1]
dataProjected#data$lat <- xy[, 2]
ggWatershed +
geom_errorbarh(data = dataProjected#data, aes(group = id, xmin = long - sd, xmax = long + sd))