Related
I have spatial coordinates in a data frame where each row (Longitude, Latitude) corresponds to the occurrence of an event I am following. I tried to map these data but instead of using points, I want to create a grid with cells of a resolution of 5 nautical miles (~ 0.083333) and count the number of occurrences of the event is each cell and plot it.
This is the code I came to write with the help of some resources. But it doesn't look the way I expected it to be. Can you figure out what's I'm doing wrong? I attached the raw positions and the resulting map I get.
Here is the link to the data.
re_pi = read.csv(file = "~/Desktop/Events.csv")
gridx <- seq(from=-19,to=-10,by=0.083333)
gridy <- seq(from=20,to=29,by=0.083333)
xcell <- unlist(lapply(re_pi$LON,function(x) min(which(gridx>x))))
ycell <- unlist(lapply(re_pi$LAT,function(y) min(which(gridy>y))))
re_pi$cell <- (length(gridx) - 1) * ycell + xcell
rr = re_pi %>%
group_by(cell)%>%
summarise(Lat = mean(LAT),Lon = mean(LON),Freq = length(cell))
my_theme <- theme_bw() + theme(panel.ontop=TRUE, panel.background=element_blank())
my_cols <- scale_color_distiller(palette='Spectral')
my_fill <- scale_fill_distiller(palette='Spectral')
ggplot(rr, aes(y=Lat, x=Lon, fill=Effort)) + geom_tile(width=1.2, height=1.2) +
borders('world', xlim=range(rr$Lon), ylim=range(rr$Lat), colour='black') + my_theme + my_fill +
coord_quickmap(xlim=range(rr$Lon), ylim=range(rr$Lat))
Nice dataset, assume these are fishing vessel VMS data. Here may be one way to achieve your objective, heavily reliant on the tidyverse and by-passing raster and shapes.
library(tidyverse)
library(mapdata) # higher resolution maps
# poor man's gridding function
grade <- function (x, dx) {
if (dx > 1)
warning("Not tested for grids larger than one")
brks <- seq(floor(min(x)), ceiling(max(x)), dx)
ints <- findInterval(x, brks, all.inside = TRUE)
x <- (brks[ints] + brks[ints + 1])/2
return(x)
}
d <-
read_csv("https://raw.githubusercontent.com/abenmhamed/data/main/Events.csv") %>%
janitor::clean_names() %>%
# make a grid 0.01 x 0.01 longitude / latitude
mutate(lon = grade(lon, 0.01),
lat = grade(lat, 0.01)) %>%
group_by(lon, lat) %>%
count() %>%
# not much happening south of 21 and north of 26
filter(between(lat, 21, 26.25))
d %>%
ggplot() +
theme_bw() +
geom_tile(aes(lon, lat, fill = n)) +
scale_fill_viridis_c(option = "B", direction = -1) +
# only data within the data-bounds
borders(database = "worldHires",
xlim = range(d$lon), ylim = range(d$lat),
fill = "grey") +
labs(x = NULL, y = NULL, fill = "Effort") +
# limit plot
coord_quickmap(xlim = range(d$lon), ylim = range(d$lat)) +
# legends within plot
theme(legend.position = c(0.77, 0.26))
Here is my attempt using the sf package. First I imported your data and converted it to an sf object. Then, I created another sf object which includes the grids. I used the raster package and the sf package in order to create the grids. Once I had the two sf object, I counted how many data points exist in each grid and added the results as a new column in foo. Finally, I drew a graphic.
library(tidyverse)
library(sf)
library(raster)
library(viridis)
# Import the data and convert it to an sf object
mydata <- read_csv("https://raw.githubusercontent.com/abenmhamed/data/main/Events.csv") %>%
st_as_sf(coords = c("LON", "LAT"),
crs = 4326, agr = "constant")
# Create an sf object for the grid
gridx <- seq(from = -19,to = -10, by = 0.083333)
gridy <- seq(from = 20,to = 29, by = 0.083333)
foo <- raster(xmn = -19, xmx = -10,
ymn = 20, ymx = 29,
nrows = length(gridx),
ncols = length(gridy)) %>%
rasterToPolygons() %>%
st_as_sf(crs = 4326) %>%
mutate(group = 1:(length(gridx)*length(gridy))) %>%
st_cast("MULTIPOLYGON")
# Now count how many data points exist in each grid
mutate(foo,
count = lengths(st_intersects(x = foo, y = mydata))) -> foo
# Draw a graphic
ggplot() +
geom_sf(data = foo, aes(fill = count)) +
scale_fill_viridis(option = "D") -> g
I've been producing animated maps showing the progression of COVID case data. In the interest of producing a minimal example I have skinnied the code down to the below, which only produces one frame. In practice I also read a number of csv files. I've tried to eliminate that in this example, but there is still one with county population data. I have posted it at https://pastebin.com/jCD9tP0X
library(urbnmapr) # For map
library(ggplot2) # For map
library(dplyr) # For summarizing
library(tidyr) # For reshaping
library(stringr) # For padding leading zeros
library(ggrepel)
library(ggmap)
library(usmap)
library(gganimate)
library(magrittr)
library(gifski)
library(scales)
#first run setup tasks
#these can be commented out once the data frames are in place
###################begin first run only################################
#define census regions
NE_region <- c("ME","NH","VT","MA", "CT", "RI", "NY", "PA", "NJ")
ne_region_bases <-c("Hanscom AFB", "Rome, NY")
# Get COVID cases, available from:
url <- "https://static.usafacts.org/public/data/covid-19/covid_confirmed_usafacts.csv"
COV <- read.csv(url, stringsAsFactors = FALSE)
#sometimes there are encoding issues with the first column name
names(COV)[1] <- "countyFIPS"
Covid <- pivot_longer(COV, cols=starts_with("X"),
values_to="cases",
names_to=c("X","date_infected"),
names_sep="X") %>%
mutate(infected = as.Date(date_infected, format="%m.%d.%Y"),
countyFIPS = str_pad(as.character(countyFIPS), 5, pad="0"))
# Obtain map data for counties (to link with covid data) and states (for showing borders)
states_sf <- get_urbn_map(map = "states", sf = TRUE)
counties_sf <- get_urbn_map(map = "counties", sf = TRUE)
# Merge county map with total cases of cov
#use this line to produce animated maps
#pop_counties_cov <- inner_join(counties_sf, Covid, by=c("county_fips"="countyFIPS"))
#use this one for a single map of the latest data
pop_counties_cov <- inner_join(counties_sf, group_by(Covid, countyFIPS) %>%
summarise(cases=sum(cases)), by=c("county_fips"="countyFIPS"))
#read the county population data
#csv at https://pastebin.com/jCD9tP0X
counties_pop <- read.csv("countyPopulations.csv", header=TRUE, stringsAsFactors = FALSE)
#pad the single digit state FIPS states
counties_pop <- counties_pop %>% mutate(CountyFIPS=str_pad(as.character(CountyFIPS),5,pad="0"))
#merge the population and covid data by FIPS
pop_counties_cov$population <- counties_pop$Population[match(pop_counties_cov$county_fips,counties_pop$CountyFIPS)]
#calculate the infection rate
pop_counties_cov <- pop_counties_cov %>% mutate(infRate = (cases/population)*100)
#counties with 0 infections don't appear in the usafacts data, so didn't get a population
#set them to 0
pop_counties_cov$population[is.na(pop_counties_cov$population)] <- 0
pop_counties_cov$infRate[is.na(pop_counties_cov$infRate)] <- 0
plotDate="April14"
basepath = "your/output file/path/here/"
naColor = "white"
lowColor = "green"
midColor = "maroon"
highColor = "red"
baseFill = "dodgerblue4"
baseColor = "firebrick"
baseShape = 23
###################end first run only################################
###################Northeast Map################################
#filter out states
ne_pop_counties_cov <- pop_counties_cov %>% filter(state_abbv %in% NE_region)
ne_states_sf <- states_sf %>% filter(state_abbv %in% NE_region)
ne_counties_sf <- counties_sf %>% filter(state_abbv %in% NE_region)
#filter out bases
neBases <- structure(list(Base = c("Hanscom AFB", "Rome, NY"), longitude = c(-71.2743123,
-75.4557303),
latitude = c(42.4579955, 43.2128473),
personnel = c(2906L,822L),
longitude.1 = c(2296805.44531269, 1951897.82199569),
latitude.1 = c(128586.352781279, 99159.9145180969)),
row.names = c(NA, -2L), class = "data.frame")
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,size=personnel),
shape = baseShape,
color = baseColor,
fill = baseFill) +
theme_bw() +
labs(size='AFMC \nMil + Civ') +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
###################End Northeast Map################################
If you run this you should get a single frame...when I do the whole animation, here is the final frame
The diamonds represent the locations of air force bases we're interested in within the region, and they are sized by how many personnel are there.
What I have been asked to do is to make the diamonds the same size, but "color code" the fill based on the number of personnel. I don't think this is a good idea, but I'm not the boss.
I'm not sure how to have two gradient fills on a single plot?
If you want to place a second filling gradient, you can have the use of new_scale_fill function from ggnewscale package:
library(ggnewscale)
p <- ne_pop_counties_cov %>%
ggplot() +
geom_sf(mapping = aes(fill = infRate, geometry=geometry), color = NA) +
geom_sf(data = ne_states_sf, fill = NA, color = "black", size = 0.25) +
coord_sf(datum = NA) +
scale_fill_gradient(name = "% Pop \nInfected", trans = "log",low=lowColor, high=highColor,
breaks=c(0, max(ne_pop_counties_cov$infRate)),
na.value = naColor) +
new_scale_fill()+
geom_point(data=neBases,
aes(x=longitude.1, y=latitude.1,fill=personnel),
shape = baseShape,
color = "black",
#fill = baseFill,
size = 5) +
scale_fill_gradient(name = "AFMC \nMil + Civ",
low = "blue", high = "magenta",
breaks = c(1,max(neBases$personnel)))+
theme_bw() +
theme(legend.position="bottom",
panel.border = element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
print(p)
Does it answer your question ?
I saw yesterday this beautiful map of McDonalds restaurants in USA. I wanted to replicate it for France (I found some data that can be downloaded here).
I have no problem plotting the dots:
library(readxl)
library(ggplot2)
library(raster)
#open data
mac_do_FR <- read_excel("./mcdo_france.xlsx")
mac_do_FR_df <- as.data.frame(mac_do_FR)
#get a map of France
mapaFR <- getData("GADM", country="France", level=0)
#plot dots on the map
ggplot() +
geom_polygon(data = mapaFR, aes(x = long, y = lat, group = group),
fill = "transparent", size = 0.1, color="black") +
geom_point(data = mac_do_FR_df, aes(x = lon, y = lat),
colour = "orange", size = 1)
I tried several methods (Thiessen polygons, heat maps, buffers), but the results I get are very poor. I can't figure out how the shaded polygons were plotted on the American map. Any pointers?
Here's my result, but it did take some manual data wrangling.
Step 1: Get geospatial data.
library(sp)
# generate a map of France, along with a fortified dataframe version for ease of
# referencing lat / long ranges
mapaFR <- raster::getData("GADM", country="France", level=0)
map.FR <- fortify(mapaFR)
# generate a spatial point version of the same map, defining your own grid size
# (a smaller size yields a higher resolution heatmap in the final product, but will
# take longer to calculate)
grid.size = 0.01
points.FR <- expand.grid(
x = seq(min(map.FR$long), max(map.FR$long), by = grid.size),
y = seq(min(map.FR$lat), max(map.FR$lat), by = grid.size)
)
points.FR <- SpatialPoints(coords = points.FR, proj4string = mapaFR#proj4string)
Step 2: Generate a voronoi diagram based on store locations, & obtain the corresponding polygons as a SpatialPolygonsDataFrame object.
library(deldir)
library(dplyr)
voronoi.tiles <- deldir(mac_do_FR_df$lon, mac_do_FR_df$lat,
rw = c(min(map.FR$long), max(map.FR$long),
min(map.FR$lat), max(map.FR$lat)))
voronoi.tiles <- tile.list(voronoi.tiles)
voronoi.center <- lapply(voronoi.tiles,
function(l) data.frame(x.center = l$pt[1],
y.center = l$pt[2],
ptNum = l$ptNum)) %>%
data.table::rbindlist()
voronoi.polygons <- lapply(voronoi.tiles,
function(l) Polygon(coords = matrix(c(l$x, l$y),
ncol = 2),
hole = FALSE) %>%
list() %>%
Polygons(ID = l$ptNum)) %>%
SpatialPolygons(proj4string = mapaFR#proj4string) %>%
SpatialPolygonsDataFrame(data = voronoi.center,
match.ID = "ptNum")
rm(voronoi.tiles, voronoi.center)
Step 3. Check which voronoi polygon each point on the map overlaps with, & calculate its distance to the corresponding nearest store.
which.voronoi <- over(points.FR, voronoi.polygons)
points.FR <- cbind(as.data.frame(points.FR), which.voronoi)
rm(which.voronoi)
points.FR <- points.FR %>%
rowwise() %>%
mutate(dist = geosphere::distm(x = c(x, y), y = c(x.center, y.center))) %>%
ungroup() %>%
mutate(dist = ifelse(is.na(dist), max(dist, na.rm = TRUE), dist)) %>%
mutate(dist = dist / 1000) # convert from m to km for easier reading
Step 4. Plot, adjusting the fill gradient parameters as needed. I felt the result of a square root transformation looks quite good for emphasizing distances close to a store, while a log transformation is rather too exaggerated, but your mileage may vary.
ggplot() +
geom_raster(data = points.FR %>%
mutate(dist = pmin(dist, 100)),
aes(x = x, y = y, fill = dist)) +
# optional. shows outline of France for reference
geom_polygon(data = map.FR,
aes(x = long, y = lat, group = group),
fill = NA, colour = "white") +
# define colour range, mid point, & transformation (if desired) for fill
scale_fill_gradient2(low = "yellow", mid = "red", high = "black",
midpoint = 4, trans = "sqrt") +
labs(x = "longitude",
y = "latitude",
fill = "Distance in km") +
coord_quickmap()
I'm working to plot the consolidated Z-value deviations (for a series of factors) from the national average for Pakistan on a fortified SPDF. For the purposes of this question, my data is irrelevant. I could provide it if necessary.
I am using ggplot to create my output where the command and result look something like this:
ggplot() + geom_polygon(data = plot.pakmod_sumZ, aes(x = long, y = lat, group = group, fill = SumZ.Cat), color = "black", size = 0.25, na.rm = TRUE) + scale_fill_manual(name = "Deviations from National Average", labels = c("-7", "-6", "-5", "-4", "-3", "-2", "-1", "Positive"), values = c("darkorange4","brown", "orangered1","tomato1","darkorange3","orange","yellow", "greenyellow"), na.value = "Grey", guide = guide_legend(reverse = TRUE)) + coord_map() + labs(x = NULL, y = NULL) + scale_x_discrete(breaks = NULL) + scale_y_discrete(breaks = NULL) + theme_minimal()
Deviations from National Average
I am trying to figure out now if it's possible to add diagonal lines in the polygons which have missing values and are coloured grey. Can this be done using ggplot?
This is an example I took from here. I opted to use the horizontal error bar geom. Mind that this isn't the only way of doing this.
library(ggplot2)
library(sp)
library(rgdal)
library(rgeos)
# create a local directory for the data
localDir <- "R_GIS_data"
if (!file.exists(localDir)) {
dir.create(localDir)
}
# download and unzip the data
url <- "ftp://www.ecy.wa.gov/gis_a/inlandWaters/wria.zip"
file <- paste(localDir, basename(url), sep='/')
if (!file.exists(file)) {
download.file(url, file)
unzip(file,exdir=localDir)
}
# create a layer name for the shapefiles (text before file extension)
layerName <- "WRIA_poly"
# read data into a SpatialPolygonsDataFrame object
dataProjected <- readOGR(dsn=localDir, layer=layerName)
dataProjected#data$id <- rownames(dataProjected#data)
# create a data.frame from our spatial object
watershedPoints <- fortify(dataProjected)
# merge the "fortified" data with the data from our spatial object
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
dataProjected#data$id <- rownames(dataProjected#data)
watershedPoints <- fortify(dataProjected)
watershedDF <- merge(watershedPoints, dataProjected#data, by = "id")
ggWatershed <- ggplot(data = watershedDF, aes(x=long, y=lat, group = group, fill = WRIA_NM)) +
geom_polygon() +
geom_path(color = "white") +
scale_fill_hue(l = 40) +
coord_equal() +
theme(legend.position = "none", title = element_blank())
# Adding coordinates to the data part of SPDF. `sd` is the variable of interest
# which is beign plotted here. Each line extends sd away from long coordinate
dataProjected#data$sd <- rnorm(nrow(xy), mean = 50000, sd = 10000)
xy <- coordinates(dataProjected)
dataProjected#data$long <- xy[, 1]
dataProjected#data$lat <- xy[, 2]
ggWatershed +
geom_errorbarh(data = dataProjected#data, aes(group = id, xmin = long - sd, xmax = long + sd))
I have written a function to load spatial data, extract data from an input dataset and merge this dataset with the spatial data. Then my function returns a map on which my cases get plotted.
My function works fine if I return my plot as the following:
(with fill = totalCases)
return ({
ggplot() +
geom_polygon(data = sl_adm2_Month, aes(x = long, y = lat, group = group,
fill = totalCases), colour = "white") +
geom_text(data = sl_adm2_months_names_DF, aes(label = NAME_2, x = long.1, y = lat.2, group = NAME_2), size = 3) +
# labs(title = paste("Ebola", str_sub(as.character(variable), 6, -1), "cases by district in Sierra Leone - until", format(as.Date(date), "%B %Y"))) +
xlab("") +
ylab("") +
theme_gray() +
theme(legend.position = "bottom")
})
However, my goal is to pass a parameter providing the value (= variable) for the fill parameter as you can see in my below code. But this throws the following error:
Error in eval(expr, envir, enclos) : object 'variable' not found
Here is my code:
plotMonths <- function(data, variable, date) {
# Reloading district polygons
sl_adm2_months <- readOGR("C:/Users/woba/Documents/Ordina/TFS-Projects/Ordina - Mail Analytics/Johnson/Wouter/03. GeoData map - R/Sierra Leone adm2", "SLE_adm2", verbose = TRUE, stringsAsFactors = FALSE)
sl_adm2_months_DF <- fortify(sl_adm2_months, region = "NAME_2")
# Getting the correct District names
colnames(sl_adm2_months_DF)[7] <- "District"
sl_adm2_months_DF$District <- ifelse(sl_adm2_months_DF$District == "Western Rural", "Western Area Rural", as.character(sl_adm2_months_DF$District))
sl_adm2_months_DF$District <- ifelse(sl_adm2_months_DF$District == "Western Urban", "Western Area Urban", as.character(sl_adm2_months_DF$District))
sl_adm2_months_DF$District <- as.factor(sl_adm2_months_DF$District)
#Extracting district names for plotting
sl_adm2_months_names_DF <- data.frame(long = coordinates(sl_adm2_months[, 1]), lat = coordinates(sl_adm2_months[, 2]))
sl_adm2_months_names_DF[, "ID_2"] <- sl_adm2_months#data[, "ID_2"]
sl_adm2_months_names_DF[, "NAME_2"] <- sl_adm2_months#data[, "NAME_2"]
# Subset May data
sl_Month <- data[data$Country == "Sierra Leone" & data$Date <= as.Date(date), ]
sl_Month <- droplevels(sl_Month)
sl_Month[is.na(sl_Month)] <- 0
confirmed <- ddply(sl_Month, .(Localite), function(x){max(x$cmlConfirmed.cases, na.rm = T)})
cases <- ddply(sl_Month, .(Localite), function(x){max(x$cmlCases, na.rm = T)})
deaths <- ddply(sl_Month, .(Localite), function(x){max(x$cmlDeaths, na.rm = T)})
sl_Month <- merge(cases, deaths, by = "Localite")
sl_Month <- merge(sl_Month, confirmed, by = "Localite")
sl_Month <- droplevels(sl_Month)
sl_Month <- droplevels(sl_Month)
colnames(sl_Month)<- c("District", "totalCases", "totalDeaths", "totalConfirmed")
sl_Month <- sl_Month[-which(sl_Month$District == "National"),]
# Merging Month data with District polygons
sl_adm2_Month <- merge(sl_adm2_months_DF, sl_Month, by = "District", all.x = TRUE)
sl_adm2_Month$totalCases <- as.numeric(sl_adm2_Month$totalCases)
sl_adm2_Month$totalDeaths <- as.numeric(sl_adm2_Month$totalDeaths)
sl_adm2_Month$totalConfirmed <- as.numeric(sl_adm2_Month$totalConfirmed)
#NA to 0 for values missing for districts
sl_adm2_Month[is.na(sl_adm2_Month)] <- 0
#Sorting
sl_adm2_Month <- sl_adm2_Month[order(sl_adm2_Month$District, sl_adm2_Month$order), ]
# Prints & Views
print(head(sl_Month))
View(sl_Month)
View(sl_adm2_Month)
Sys.setlocale("LC_TIME", "English")
# Plotting Cases
return ({
ggplot() +
geom_polygon(data = sl_adm2_Month, aes(x = long, y = lat, group = group,
fill = variable), colour = "white") +
geom_text(data = sl_adm2_months_names_DF, aes(label = NAME_2, x = long.1, y = lat.2, group = NAME_2), size = 3) +
# labs(title = paste("Ebola", str_sub(as.character(variable), 6, -1), "cases by district in Sierra Leone - until", format(as.Date(date), "%B %Y"))) +
xlab("") +
ylab("") +
theme_gray() +
theme(legend.position = "bottom")
})
}
# Plotting the months - variable = second input and must be IN c(totalDeaths, totalCases, totalConfirmed)
plotMonths(final_dataset, "totalCases", "2014-05-31")
I've read some similar questions on the forum but wasn't able to resolve my issue.
Any help on how to fix this is very welcome!
Using 'aes_string' instead of 'aes' solved my issue.
aes_string(x = "long", y = "lat", group = "group", fill = variable)
Explanation on the differences between aes & aes_string for the ggplot2 package can be found here:
What is the difference between aes and aes_string (ggplot2) in R
All credit goes to Axeman & Benjamin - their answers solved my issue!