I'd like to generate a choropleth map using the following data points:
Longitude
Latitude
Price
Here is the dataset - https://www.dropbox.com/s/0s05cl34bko7ggm/sample_data.csv?dl=0.
I would like the map to show the areas where the price is higher and the where price is lower. It should most probably look like this (sample image):
Here is my code:
library(ggmap)
map <- get_map(location = "austin", zoom = 9)
data <- read.csv(file.choose(), stringsAsFactors = FALSE)
data$average_rate_per_night <- as.numeric(gsub("[\\$,]", "",
data$average_rate_per_night))
ggmap(map, extent = "device") +
stat_contour( data = data, geom="polygon",
aes( x = longitude, y = latitude, z = average_rate_per_night,
fill = ..level.. ) ) +
scale_fill_continuous( name = "Price", low = "yellow", high = "red" )
I'm getting the following error message:
2: Computation failed in `stat_contour()`:
Contour requires single `z` at each combination of `x` and `y`.
I'd really appreciate any help on how this can be fixed or any other method to generate this type of heatmap. Please note that I'm interested in the weight of the price, not density of the records.
If you insist on using the contour approach then you need to provide a value for every possible x,y coordinate combination you have in your data. To achieve this I would highly recommend to grid the space and generate some summary statistics per bin.
I attach a working example below based on the data you provided:
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
# convert the rate from string into numbers
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
# generate bins for the x, y coordinates
xbreaks <- seq(floor(min(data$latitude)), ceiling(max(data$latitude)), by = 0.01)
ybreaks <- seq(floor(min(data$longitude)), ceiling(max(data$longitude)), by = 0.01)
# allocate the data points into the bins
data$latbin <- xbreaks[cut(data$latitude, breaks = xbreaks, labels=F)]
data$longbin <- ybreaks[cut(data$longitude, breaks = ybreaks, labels=F)]
# Summarise the data for each bin
datamat <- data[, list(average_rate_per_night = mean(average_rate_per_night)),
by = c("latbin", "longbin")]
# Merge the summarised data with all possible x, y coordinate combinations to get
# a value for every bin
datamat <- merge(setDT(expand.grid(latbin = xbreaks, longbin = ybreaks)), datamat,
by = c("latbin", "longbin"), all.x = TRUE, all.y = FALSE)
# Fill up the empty bins 0 to smooth the contour plot
datamat[is.na(average_rate_per_night), ]$average_rate_per_night <- 0
# Plot the contours
ggmap(map, extent = "device") +
stat_contour(data = datamat, aes(x = longbin, y = latbin, z = average_rate_per_night,
fill = ..level.., alpha = ..level..), geom = 'polygon', binwidth = 100) +
scale_fill_gradient(name = "Price", low = "green", high = "red") +
guides(alpha = FALSE)
You can then play around with the bin size and the contour binwidth to get the desired result but you could additionally apply a smoothing function on the grid to get an even smoother contour plot.
You could use the stat_summary_2d() or stat_summary_hex() function to achieve a similar result. These functions divide the data into bins (defined by x and y), and then the z values for each bin are summarised based on a given function. In the example below I have selected mean as an aggregation function and the map basically shows the average price in each bin.
Note: I needed to treat your average_rate_per_night variable appropriately in order to convert it into numbers (removed the $ sign and the comma).
library(ggmap)
library(data.table)
map <- get_map(location = "austin", zoom = 12)
data <- setDT(read.csv(file.choose(), stringsAsFactors = FALSE))
data[, average_rate_per_night := as.numeric(gsub(",", "",
substr(average_rate_per_night, 2, nchar(average_rate_per_night))))]
ggmap(map, extent = "device") +
stat_summary_2d(data = data, aes(x = longitude, y = latitude,
z = average_rate_per_night), fun = mean, alpha = 0.6, bins = 30) +
scale_fill_gradient(name = "Price", low = "green", high = "red")
Related
I would like to plot network matrix of regions in ggplot - I know that for ggplot we need data.frame in tidy format in order to plot it.
I am able to plot network based on number of neighbours in ggplot however when I need spatial network based on maximum distance I get an error when creating data frame for ggplot.
I provided example down bellow:
library(ggplot2)
library(sf)
library(spdep)
# Polygon data
URL <- "https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_DEU_1_sp.rds"
data <- readRDS(url(URL))
CORD <- rbind(
coordinates(data)
)
rownames(CORD) <- NULL
# Spatial Network based on number of neighbours
cns <- knearneigh(CORD, k = 5, longlat=T)
scnsn <- knn2nb(cns, row.names = NULL, sym = T)
cS <- nb2listw(scnsn)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
# Creating dataframe from spatail network (neiresth neighbours) for ggplot plot
n = length(attributes(cS$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS$neighbours,length)),
to = unlist(cS$neighbours),
weight = unlist(cS$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = FALSE) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()
### Another type of network matrix - Maximum distance
nb200km <- dnearneigh(CORD, d1=0, d2=100, longlat=T)
summary(nb200km)
cS_distance <- nb2listw(nb200km, zero.policy = T)
# I need to recreate this plot in ggplot
plot(data)
plot(W, coordinates(data), add = T)
data_df <- data.frame(CORD)
colnames(data_df) <- c("long", "lat")
n = length(attributes(cS_distance$neighbours)$region.id)
DA = data.frame(
from = rep(1:n,sapply(cS_distance$neighboaurs,length)),
to = unlist(cS_distance$neighbours),
weight = unlist(cS_distance$weights)
)
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
creating dataframe from cS object works, however creating a dataframe from cS_distance object returns an error.
I would like to ask how to solve the error and plot distance spatial network in ggplot.
I'm not sure if this is what you're looking for, but the problem seems to be that you have some regions with no neighbours in cS_distance, so DA$to contains some zero values. This means when you do data_df[DA$from,] it has more rows than data_df[DA$to,], and your code throws an error when you try to cbind them.
If you filter out the rows where DA$to is zero, you get this:
n = length(attributes(cS_distance$neighbours)$region.id)
from <- rep(1:n,sapply(cS_distance$neighbours,length))
to <- unlist(cS_distance$neighbours)[]
weight <- numeric(length(to))
weight[which(to != 0)] <- unlist(cS_distance$weights)
DA = data.frame(from = from, to = to, weight = weight)
DA <- DA[DA$to != 0,]
DA = cbind(DA, data_df[DA$from,], data_df[DA$to,])
colnames(DA)[4:7] = c("long","lat","long_to","lat_to")
# ggplot of spatial network
ggplot(data, aes(x = long, y =lat))+
geom_polygon(aes(group = group), color = "red", fill = NA) +
geom_segment(data = DA, aes(xend = long_to, yend = lat_to), size=0.5, color = "royalblue") +
coord_map()
I saw yesterday this beautiful map of McDonalds restaurants in USA. I wanted to replicate it for France (I found some data that can be downloaded here).
I have no problem plotting the dots:
library(readxl)
library(ggplot2)
library(raster)
#open data
mac_do_FR <- read_excel("./mcdo_france.xlsx")
mac_do_FR_df <- as.data.frame(mac_do_FR)
#get a map of France
mapaFR <- getData("GADM", country="France", level=0)
#plot dots on the map
ggplot() +
geom_polygon(data = mapaFR, aes(x = long, y = lat, group = group),
fill = "transparent", size = 0.1, color="black") +
geom_point(data = mac_do_FR_df, aes(x = lon, y = lat),
colour = "orange", size = 1)
I tried several methods (Thiessen polygons, heat maps, buffers), but the results I get are very poor. I can't figure out how the shaded polygons were plotted on the American map. Any pointers?
Here's my result, but it did take some manual data wrangling.
Step 1: Get geospatial data.
library(sp)
# generate a map of France, along with a fortified dataframe version for ease of
# referencing lat / long ranges
mapaFR <- raster::getData("GADM", country="France", level=0)
map.FR <- fortify(mapaFR)
# generate a spatial point version of the same map, defining your own grid size
# (a smaller size yields a higher resolution heatmap in the final product, but will
# take longer to calculate)
grid.size = 0.01
points.FR <- expand.grid(
x = seq(min(map.FR$long), max(map.FR$long), by = grid.size),
y = seq(min(map.FR$lat), max(map.FR$lat), by = grid.size)
)
points.FR <- SpatialPoints(coords = points.FR, proj4string = mapaFR#proj4string)
Step 2: Generate a voronoi diagram based on store locations, & obtain the corresponding polygons as a SpatialPolygonsDataFrame object.
library(deldir)
library(dplyr)
voronoi.tiles <- deldir(mac_do_FR_df$lon, mac_do_FR_df$lat,
rw = c(min(map.FR$long), max(map.FR$long),
min(map.FR$lat), max(map.FR$lat)))
voronoi.tiles <- tile.list(voronoi.tiles)
voronoi.center <- lapply(voronoi.tiles,
function(l) data.frame(x.center = l$pt[1],
y.center = l$pt[2],
ptNum = l$ptNum)) %>%
data.table::rbindlist()
voronoi.polygons <- lapply(voronoi.tiles,
function(l) Polygon(coords = matrix(c(l$x, l$y),
ncol = 2),
hole = FALSE) %>%
list() %>%
Polygons(ID = l$ptNum)) %>%
SpatialPolygons(proj4string = mapaFR#proj4string) %>%
SpatialPolygonsDataFrame(data = voronoi.center,
match.ID = "ptNum")
rm(voronoi.tiles, voronoi.center)
Step 3. Check which voronoi polygon each point on the map overlaps with, & calculate its distance to the corresponding nearest store.
which.voronoi <- over(points.FR, voronoi.polygons)
points.FR <- cbind(as.data.frame(points.FR), which.voronoi)
rm(which.voronoi)
points.FR <- points.FR %>%
rowwise() %>%
mutate(dist = geosphere::distm(x = c(x, y), y = c(x.center, y.center))) %>%
ungroup() %>%
mutate(dist = ifelse(is.na(dist), max(dist, na.rm = TRUE), dist)) %>%
mutate(dist = dist / 1000) # convert from m to km for easier reading
Step 4. Plot, adjusting the fill gradient parameters as needed. I felt the result of a square root transformation looks quite good for emphasizing distances close to a store, while a log transformation is rather too exaggerated, but your mileage may vary.
ggplot() +
geom_raster(data = points.FR %>%
mutate(dist = pmin(dist, 100)),
aes(x = x, y = y, fill = dist)) +
# optional. shows outline of France for reference
geom_polygon(data = map.FR,
aes(x = long, y = lat, group = group),
fill = NA, colour = "white") +
# define colour range, mid point, & transformation (if desired) for fill
scale_fill_gradient2(low = "yellow", mid = "red", high = "black",
midpoint = 4, trans = "sqrt") +
labs(x = "longitude",
y = "latitude",
fill = "Distance in km") +
coord_quickmap()
I try to generate a heatmap based on values.
Here is my dataset which consists of three variables: Lat (latitude), Lon (longitude), and Value.
https://www.dropbox.com/s/s53xeplywz9jh15/sample_data.csv?dl=0
I have looked through the relevant posts and found this useful:
Generating spatial heat map via ggmap in R based on a value
I copied the code in that post and here my code looks like:
# import data and libaries
library(ggplot2)
library(ggmap)
Yunan<-read.csv("C:\\Program Files\\RStudio\\data\\pb_sp\\sample_data.csv", header = TRUE)
# call the map to see point distribution
Yunan_map<-get_map(location="yunan",zoom=6,maptype="terrain",scale=2)
ggmap(Yunan_map)+geom_point(data=Yunan,aes(x=Yunan$Lon,y=Yunan$Lat,fill="red",alpha=0.3,size=0.05,shape=21))+scale_shape_identity()
# 1. generate bins for x, y coordinates (unit=decimal degree)
xbreaks <- seq(floor(min(Yunan$Lat,na.rm=TRUE)), ceiling(max(Yunan$Lat,na.rm=TRUE)), by = 0.5)
ybreaks <- seq(floor(min(Yunan$Lon,na.rm=TRUE)), ceiling(max(Yunan$Lon,na.rm=TRUE)), by = 0.5)
# 2. allocate the data points into the bins
Yunan$latbin <- xbreaks[cut(Yunan$Lat, breaks = xbreaks, labels=F)]
Yunan$longbin <- ybreaks[cut(Yunan$Lon, breaks = ybreaks, labels=F)]
# 3. summarise the data for each bin (use the median)
datamat <- Yunan[, list(Value= median(Value)),
by = c("latbin", "longbin" )]
# 4. Merge the summarised data with all possible x, y coordinate combinations to get
# a value for every bin
datamat <- merge(setDT(expand.grid(latbin = xbreaks, longbin = ybreaks)), datamat,
by = c("latbin", "longbin"), all.x = TRUE, all.y = FALSE)
# 5. Fill up the empty bins 0 to smooth the contour plot
datamat[is.na(Value), ]$Value <- 0
# 6. Plot the contours
ggmap(Yunan_map,extent ="device") +
stat_contour(data = datamat, aes(x = longbin, y = latbin, z = Value,
fill = ..level.., alpha = ..level..), geom = 'polygon', binwidth = 30) +
scale_fill_gradient(name = "Value", low = "green", high = "red") +
guides(alpha = FALSE)
However, I encountered two problems
After executing the step 3 (summarise the data for each bin), I got this error message:
Error in [.data.frame(Yunan, , list(Value = median(Value)), by = c("latbin", :
unused argument (by = c("latbin", "longbin"))
I wish to change the colour scheme from gradient to discrete colours, something like this map:
Since the values in my dataset range from 17 to 21, I want to classify them in to different bins such as 17-17.5, 17.5-18, 18-18.5.... with corresponding colours.
Any suggestions that I can fix these problems. Thanks in advance.
I tried to get Berlin map from OpenStreetMap and then plot some locations on it (they should look like points, not markers).
I got the map based on the data I have cause I don't want a whole map, but area where includes all locations. However when I plot the points on the map, it delivers an error message:
non-numeric argument to binary operator
My code:
library(ggplot2)
library(OpenStreetMap)
# creating a sample data.frame with lat/lon points
lon <- read.csv("Data.csv", header = TRUE, colClasses=c("NULL", "NULL", NA))
lat <- read.csv("Data.csv", header = TRUE, colClasses=c("NULL", NA, "NULL"))
df <- as.data.frame(cbind(lon,lat))
# getting the map
mapberlin <- openmap(c(max(lat)+0.03,min(lon)-0.03), c(min(lat)-0.03,max(lon)+0.03), zoom = NULL,type = 'osm')
# plotting the map with some points on it and draw line between points
plot(mapberlin) + geom_point(data = df, aes(x = lon, y = lat, fill = 'red'), size = 2, shape = 21)
+geom_line(data = df, aes(x = lon, y = lat), color = 'blue') +
+guides(fill=FALSE, size=FALSE)
I'm working to plot the consolidated Z-value deviations (for a series of factors) from the national average for Pakistan on a fortified SPDF. For the purposes of this question, my data is irrelevant. I could provide it if necessary.
I am using ggplot to create my output where the command and result look something like this:
ggplot() + geom_polygon(data = plot.pakmod_sumZ, aes(x = long, y = lat, group = group, fill = SumZ.Cat), color = "black", size = 0.25, na.rm = TRUE) + scale_fill_manual(name = "Deviations from National Average", labels = c("-7", "-6", "-5", "-4", "-3", "-2", "-1", "Positive"), values = c("darkorange4","brown", "orangered1","tomato1","darkorange3","orange","yellow", "greenyellow"), na.value = "Grey", guide = guide_legend(reverse = TRUE)) + coord_map() + labs(x = NULL, y = NULL) + scale_x_discrete(breaks = NULL) + scale_y_discrete(breaks = NULL) + theme_minimal()
Deviations from National Average
I am trying to figure out now if it's possible to add diagonal lines in the polygons which have missing values and are coloured grey. Can this be done using ggplot?
This is an example I took from here. I opted to use the horizontal error bar geom. Mind that this isn't the only way of doing this.
library(ggplot2)
library(sp)
library(rgdal)
library(rgeos)
# create a local directory for the data
localDir <- "R_GIS_data"
if (!file.exists(localDir)) {
dir.create(localDir)
}
# download and unzip the data
url <- "ftp://www.ecy.wa.gov/gis_a/inlandWaters/wria.zip"
file <- paste(localDir, basename(url), sep='/')
if (!file.exists(file)) {
download.file(url, file)
unzip(file,exdir=localDir)
}
# create a layer name for the shapefiles (text before file extension)
layerName <- "WRIA_poly"
# read data into a SpatialPolygonsDataFrame object
dataProjected <- readOGR(dsn=localDir, layer=layerName)
dataProjected#data$id <- rownames(dataProjected#data)
# create a data.frame from our spatial object
watershedPoints <- fortify(dataProjected)
# merge the "fortified" data with the data from our spatial object
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
dataProjected#data$id <- rownames(dataProjected#data)
watershedPoints <- fortify(dataProjected)
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
ggWatershed <- ggplot(data = watershedDF, aes(x=long, y=lat, group = group, fill = WRIA_NM)) +
geom_polygon() +
geom_path(color = "white") +
scale_fill_hue(l = 40) +
coord_equal() +
theme(legend.position = "none", title = element_blank())
# Adding coordinates to the data part of SPDF. `sd` is the variable of interest
# which is beign plotted here. Each line extends sd away from long coordinate
dataProjected#data$sd <- rnorm(nrow(xy), mean = 50000, sd = 10000)
xy <- coordinates(dataProjected)
dataProjected#data$long <- xy[, 1]
dataProjected#data$lat <- xy[, 2]
ggWatershed +
geom_errorbarh(data = dataProjected#data, aes(group = id, xmin = long - sd, xmax = long + sd))