Summarizing Latitude, Longitude, and Counts Data for ggplot Usage - r

I have been provided with some customer data in Latitude, Longitude, and Counts format. All the data I need to create a ggplot heatmap is present, but I do not know how to put it into the format ggplot requires.
I am trying to aggregate the data by total counts within 0.01 Lat and 0.01 Lon blocks (typical heatmap), and I instinctively thought "tapply". This creates a nice summary by block size, as desired, but the format is wrong. Furthermore, I would really like to have empty Lat or Lon block values be included as zeroes, even if there is nothing there... otherwise the heatmap ends up looking streaky and odd.
I have created a subset of my data for your reference in the code below:
# m is the matrix of data provided
m = matrix(c(44.9591051,44.984884,44.984884,44.9811399,
44.9969096,44.990894,44.9797023,44.983334,
-93.3120017,-93.297668,-93.297668,-93.2993524,
-93.2924484,-93.282462,-93.2738911,-93.26667,
69,147,137,22,68,198,35,138), nrow=8, ncol=3)
colnames(m) <- c("Lat", "Lon", "Count")
m <- as.data.frame(m)
s = as.data.frame((tapply(m$Count, list(round(m$Lon,2), round(m$Lat,2)), sum)))
s[is.na(s)] <- 0
# Data frame "s" has all the data, but not exactly in the format desired...
# First, it has a column for each latitude, instead of one column for Lon
# and one for Lat, and second, it needs to have 0 as the entry data for
# Lat / Lon pairs that have no other data. As it is, there are only zeroes
# when one of the other entries has a Lat or Lon that matches... if there
# are no entries for a particular Lat or Lon value, then nothing at all is
# reported.
desired.format = matrix(c(44.96,44.96,44.96,44.96,44.96,
44.97,44.97,44.97,44.97,44.97,44.98,44.98,44.98,
44.98,44.98,44.99,44.99,44.99,44.99,44.99,45,45,
45,45,45,-93.31,-93.3,-93.29,-93.28,-93.27,-93.31,
-93.3,-93.29,-93.28,-93.27,-93.31,-93.3,-93.29,
-93.28,-93.27,-93.31,-93.3,-93.29,-93.28,-93.27,
-93.31,-93.3,-93.29,-93.28,-93.27,69,0,0,0,0,0,0,
0,0,0,0,306,0,0,173,0,0,0,198,0,0,0,68,0,0),
nrow=25, ncol=3)
colnames(desired.format) <- c("Lat", "Lon", "Count")
desired.format <- as.data.frame(desired.format)
minneapolis = get_map(location = "minneapolis, mn", zoom = 12)
ggmap(minneapolis) + geom_tile(data = desired.format, aes(x = Lon, y = Lat, alpha = Count), fill="red")

Here is a stab with geom_hex and stat_density2d. The idea of making bins by truncating coordinates makes me a bit uneasy.
What you have is count data, with lat/longs given, which means ideally you would need a weight parameter, but that is as far as I know not implemented with geom_hex. Instead, we hack it by repeating rows per the count variable, similar to the approach here.
## hack job to repeat records to full count
m<-as.data.frame(m)
m_long <- with(m, m[rep(1:nrow(m), Count),])
## stat_density2d
ggplot(m_long, aes(Lat, Lon)) +
stat_density2d(aes(alpha=..level.., fill=..level..), size=2,
bins=10, geom=c("polygon","contour")) +
scale_fill_gradient(low = "blue", high = "red") +
geom_density2d(colour="black", bins=10) +
geom_point(data = m_long)
## geom_hex alternative
bins=6
ggplot(m_long, aes(Lat, Lon)) +
geom_hex(bins=bins)+
coord_equal(ratio = 1/1)+
scale_fill_gradient(low = "blue", high = "red") +
geom_point(data = m_long,position = "jitter")+
stat_binhex(aes(label=..count..,size=..count..*.5), size=3.5,geom="text", bins=bins, colour="white")
These, respectively, produce the following:
And the binned version:
EDIT:
With basemap:
map +
stat_density2d(data = m_long, aes(x = Lon, y = Lat,
alpha=..level.., fill=..level..),
size=2,
bins=10,
geom=c("polygon","contour"),
inherit.aes=FALSE) +
scale_fill_gradient(low = "blue", high = "red") +
geom_density2d(data = m_long, aes(x = Lon, y=Lat),
colour="black", bins=10,inherit.aes=FALSE) +
geom_point(data = m_long, aes(x = Lon, y=Lat),inherit.aes=FALSE)
## and the hexbin map...
map + #ggplot(m_long, aes(Lat, Lon)) +
geom_hex(bins=bins,data = m_long, aes(x = Lon, y = Lat),alpha=.5,
inherit.aes=FALSE) +
geom_point(data = m_long, aes(x = Lon, y=Lat),
inherit.aes=FALSE,position = "jitter")+
scale_fill_gradient(low = "blue", high = "red")

Related

Is there a polygon of world countries from -360 to 360 degrees in R?

I am kind of stuck on how to plot world maps that do not adjust to the map_data provide by the ggplot2 package, which is "world" (from -180 to 180 degrees) and "world2" (from 0 to 360 degrees). For example, if I want to plot from 100°E to 20°E, no polygon provide by ggplot2 is useful. Here is the code of the example:
library(ggplot2)
map1 <- map_data('world')
map2 <- map_data('world2')
ggplot() +
theme_bw() +
geom_polygon(data = map1,
mapping = aes(x = long,
y = lat,
group = group),
col = 'gray') +
coord_fixed(1.4, xlim = c(100,380))
ggplot() +
theme_bw() +
geom_polygon(data = map2,
mapping = aes(x = long,
y = lat,
group = group),
col = 'gray') +
coord_fixed(1.4, xlim = c(100,380))
So, are there world maps polygons like those of the example but with a greater range?
This is scrappy, but why not? You could show a duplicated world so that every region can be shown without going over the edge.
library(dplyr)
map1 <- map_data('world')
map2 <- map_data('world') %>% mutate(long = long+360)
map3 <- map_data('world') %>% mutate(long = long+720)
ggplot() +
theme_bw() +
geom_polygon(data = map1,
mapping = aes(x = long,
y = lat,
group = group),
col = 'gray') +
geom_polygon(data = map2,
mapping = aes(x = long,
y = lat,
group = group),
col = 'gray') +
geom_polygon(data = map3,
mapping = aes(x = long,
y = lat,
group = group),
col = 'gray') +
coord_fixed(1.4, xlim = c(100,480))
Rather than finding those polygons, you might just clip the longitude boundaries with a coord_quickmap() or coord_sf(). Note that coord_map() will incorrectly group polygon's points unless clipped before the ggplot() call.
library(tidyverse)
ggplot() +
theme_bw() +
geom_polygon(data = map2,
mapping = aes(x = long,
y = lat,
group = group),
col = 'gray') +
coord_sf(xlim = c(20,100))
#coord_quickmap(xlim = c(100,20)) is also an alternative to coord_sf()
"world" and "world2" are two versions of the world map that are implemented in the "maps" package. But that package actually allows you to define a world map with any boundaries, e.g. c(20,380) or c(100,460).
The simplest way is just to use the "wrap" option (which is described in the man page of maps::map). This option is passed to maps::map() (so look in the maps package for full help page)
map3 <- map_data('world', wrap=c(20,380))
This should give you a nice map for any meridian that you may choose. Note that in this case "wrap" must be be a vector and that the left and right boundaries must be exactly 360 apart.

Using geom_text() to display text in geom_polygon() [duplicate]

I am trying to label my polygons by using ggplot in R. I found a topic here on stackoverflow that I think is very close to what I want except with points.
Label points in geom_point
I found some methods online. Now I first need to find the central location of each shape and then I have to put these locations together with the name together. Then link this to the labeling function in geom_text()
ggplot centered names on a map
Since I have been trying for a long time now I decided to ask the question and hope that someone here can give me the final push to what I want. My plotting function:
region_of_interest.fort <- fortify(region_of_interest, region = "score")
region_of_interest.fort$id <- as.numeric(region_of_interest.fort$id)
region_of_interest.fort$id <- region_of_interest.fort$id
region_of_interest.fort1 <- fortify(region_of_interest, region = "GM_NAAM")
region_of_interest.fort1$id <- as.character(region_of_interest.fort1$id)
region_of_interest.fort1$id <- region_of_interest.fort1$id
idList <- unique(region_of_interest.fort1$id)
centroids.df <- as.data.frame(coordinates(region_of_interest))
names(centroids.df) <- c("Longitude", "Latitude")
randomMap.df <- data.frame(id = idList, shading = runif(length(idList)), centroids.df)
ggplot(data = region_of_interest.fort, aes(x = long, y = lat, fill = id, group = group)) +
geom_polygon() +
geom_text(centroids.df, aes(label = id, x = Longitude, y = Latitude)) +
scale_fill_gradient(high = "green", low = "red", guide = "colorbar") +
coord_equal() +
theme() +
ggtitle("Title")
It gives me the error: ggplot2 doesn't know how to deal with data of class uneval
My data
region_of_interest$GM_NAAM
[1] Groningen Haren Ooststellingwerf Assen Aa en Hunze Borger- Odoorn
[7] Noordenveld Westerveld Tynaarlo Midden-Drenthe
415 Levels: 's-Gravenhage 's-Hertogenbosch Aa en Hunze Aalburg Aalsmeer Aalten ... Zwolle
region_of_interest$score
[1] 10 -2 -1 2 -1 -4 -4 -5 0 0
Try something like this?
Get a data frame of the centroids of your polygons from the
original map object.
In the data frame you are plotting, ensure there are columns for
the ID you want to label, and the longitude and latitude of those
centroids.
Use geom_text in ggplot to add the labels.
Based on this example I read a world map, extracting the ISO3 IDs to use as my polygon labels, and make a data frame of countries' ID, population, and longitude and latitude of centroids. I then plot the population data on a world map and add labels at the centroids.
library(rgdal) # used to read world map data
library(rgeos) # to fortify without needing gpclib
library(maptools)
library(ggplot2)
library(scales) # for formatting ggplot scales with commas
# Data from http://thematicmapping.org/downloads/world_borders.php.
# Direct link: http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip
# Unpack and put the files in a dir 'data'
worldMap <- readOGR(dsn="data", layer="TM_WORLD_BORDERS_SIMPL-0.3")
# Change "data" to your path in the above!
worldMap.fort <- fortify(world.map, region = "ISO3")
# Fortifying a map makes the data frame ggplot uses to draw the map outlines.
# "region" or "id" identifies those polygons, and links them to your data.
# Look at head(worldMap#data) to see other choices for id.
# Your data frame needs a column with matching ids to set as the map_id aesthetic in ggplot.
idList <- worldMap#data$ISO3
# "coordinates" extracts centroids of the polygons, in the order listed at worldMap#data
centroids.df <- as.data.frame(coordinates(worldMap))
names(centroids.df) <- c("Longitude", "Latitude") #more sensible column names
# This shapefile contained population data, let's plot it.
popList <- worldMap#data$POP2005
pop.df <- data.frame(id = idList, population = popList, centroids.df)
ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(aes(fill = population), colour= "grey", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
scale_fill_gradient(high = "red", low = "white", guide = "colorbar", labels = comma) +
geom_text(aes(label = id, x = Longitude, y = Latitude)) + #add labels at centroids
coord_equal(xlim = c(-90,-30), ylim = c(-60, 20)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "World Population") +
theme_bw()
Minor technical note: actually coordinates in the sp package doesn't quite find the centroid, but it should usually give a sensible location for a label. Use gCentroid in the rgeos package if you want to label at the true centroid in more complex situations like non-contiguous shapes.
The accepted answer here may work, but the actual question asked specifically notes that there is an error "ggplot2 doesn't know how to deal with data of class uneval."
The reason that it is giving you the error is because the inclusion of centroids.df needs to be a named variable (e.g. accompanied by "data=")
Currently:
ggplot(data = region_of_interest.fort, aes(x = long, y = lat, fill = id, group = group)) +
geom_polygon() +
geom_text(centroids.df, aes(label = id, x = Longitude, y = Latitude)) +
scale_fill_gradient(high = "green", low = "red", guide = "colorbar") +
coord_equal() +
theme() +
ggtitle("Title")
Should be (note: "data=centroids.df"):
ggplot(data = region_of_interest.fort, aes(x = long, y = lat, fill = id, group = group)) +
geom_polygon() +
geom_text(data=centroids.df, aes(label = id, x = Longitude, y = Latitude)) +
scale_fill_gradient(high = "green", low = "red", guide = "colorbar") +
coord_equal() +
theme() +
ggtitle("Title")
This issue was addressed here: How to deal with "data of class uneval" error from ggplot2?

Tips to make plot with 5 datasets clear

I'm really new to R and I'm trying to plot data from air polution with NOx from 5 different locations (having a data of monthly averages from every location from 01-1996 to 12-2019). Each plot line should represent different location.
I've created a ggplot but I find it really unclear. I would like to ask you about your tips to make that plot better to read (It will be no bigger than A4, because it will be included in my work and printed). I would also like to have more years on X axis (1996, 1997, 1998)
ALIBA <- read_csv("ALIBA_Praha/NOx/all_sorted.csv")
BMISA <- read_csv("BMISA_Mikulov/NOx/all_sorted.csv")
CCBDA <- read_csv("CCBDA_CB/NOx/all_sorted.csv")
TKARA <- read_csv("TKARA_Karvina/NOx/all_sorted.csv")
UULKA <- read_csv("UULKA_UnL/NOx/all_sorted.csv")
ggplot() +
geom_line(data = ALIBA, aes(x = START_TIME, y = VALUE), color = "blue") +
geom_line(data = BMISA, aes(x = START_TIME, y = VALUE), color = "red") +
geom_line(data = CCBDA, aes(x = START_TIME, y = VALUE), color = "yellow") +
geom_line(data = TKARA, aes(x = START_TIME, y = VALUE), color = "green") +
geom_line(data = UULKA, aes(x = START_TIME, y = VALUE), color = "pink")
all csv files are in format:
START_TIME,VALUE
1996-01-01T00:00:00Z,61.3049451304964
1996-02-01T00:00:00Z,47.7234010245664
1996-03-01T00:00:00Z,33.083512309072
1996-04-01T00:00:00Z,47.771166691758
1996-05-01T00:00:00Z,24.7022422574005
1996-06-01T00:00:00Z,25.4495954480684
1996-07-01T00:00:00Z,23.301224242488
...
Thanks
First, I would paste all data sets together:
ALIBA <- read_csv("ALIBA_Praha/NOx/all_sorted.csv")
ALIBA$Location <- "ALIBA" # and so on
BMISA <- read_csv("BMISA_Mikulov/NOx/all_sorted.csv")
CCBDA <- read_csv("CCBDA_CB/NOx/all_sorted.csv")
TKARA <- read_csv("TKARA_Karvina/NOx/all_sorted.csv")
UULKA <- read_csv("UULKA_UnL/NOx/all_sorted.csv")
df <- rbind(ALIBA, BMISA, ...) # and so on
ggplot(data = df, aes(x = START_TIME, y = VALUE, color = Location) +
geom_line(size = 1) + # play with the stroke thickness
scale_color_brewer(palette = "Set1") + # here you can choose from a wide variety of palettes, just google
How would you like to add more years? In the same graph (everything will be tiny) or in seperate "windows" (= facets, better)?

Raster image on world map in ggplot

I want to fill certain values (pressure) continuously as gradient fill on a world map and I am writing the following code:
df = data.frame(phi)
names(df) = lat
df$lon= lon
mdata = melt(df, id=c("lon"))
names(mdata) = c("lon", "lat", "x")
mdata$x = as.numeric(mdata$x)
mdata$lon = as.numeric(mdata$lon)
mdata$lat = as.numeric(as.character(mdata$lat))
wr <- map_data("world")
# Prepare a map of World
wrmap <- ggplot(wr, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
geom_point(data=mdata, inherit.aes=FALSE, aes(x=lon, y=lat, colour=x), size=3, shape=4) +
scale_fill_gradient("Phi", limits=c(4500,6000)) +
theme_bw() +
coord_equal()
wrmap
Unfortunately the points are coming out discreet.
Any ideas how to fix this?
I am not exactly sure what you want because you didn't give us any data, but I made some guesses and did this:
library(ggplot2)
library(maps)
library(reshape2)
# Generate some fake data
lat <- seq(-90, 90, by = 5)
lon <- seq(-180, 180, by = 10)
phi <- 1500*tcrossprod( sin( pi*lat/180 ), cos( pi*lon/180 ))^ 2 + 4500
# above thanks to #NBAtrends for turning my two ugly for loops into this elegant statement
df = data.frame(phi)
names(df) = lat
df$lon = lon
mdata = melt(df, id = c("lon"))
names(mdata) = c("lon", "lat", "x")
mdata$x = as.numeric(mdata$x)
mdata$lon = as.numeric(mdata$lon)
mdata$lat = as.numeric(as.character(mdata$lat))
wr <- map_data("world")
# Prepare a map of World
wrmap <- ggplot(wr, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
geom_point(data=mdata, inherit.aes=FALSE,aes(x=lon, y=lat, color=x),size=3) +
scale_color_gradient("Phi", limits = c(4500, 6000)) +
theme_bw() +
coord_equal()
wrmap
Yielding this, which seems close to what you probably want:
This leads me to conclude that the problem is with your data. By comparing it to my fake data, I think you can probably figure out your problem.
Also I changed the "x" to a circle since you couldn't see it's color very well.

Labeling center of map polygons in R ggplot

I am trying to label my polygons by using ggplot in R. I found a topic here on stackoverflow that I think is very close to what I want except with points.
Label points in geom_point
I found some methods online. Now I first need to find the central location of each shape and then I have to put these locations together with the name together. Then link this to the labeling function in geom_text()
ggplot centered names on a map
Since I have been trying for a long time now I decided to ask the question and hope that someone here can give me the final push to what I want. My plotting function:
region_of_interest.fort <- fortify(region_of_interest, region = "score")
region_of_interest.fort$id <- as.numeric(region_of_interest.fort$id)
region_of_interest.fort$id <- region_of_interest.fort$id
region_of_interest.fort1 <- fortify(region_of_interest, region = "GM_NAAM")
region_of_interest.fort1$id <- as.character(region_of_interest.fort1$id)
region_of_interest.fort1$id <- region_of_interest.fort1$id
idList <- unique(region_of_interest.fort1$id)
centroids.df <- as.data.frame(coordinates(region_of_interest))
names(centroids.df) <- c("Longitude", "Latitude")
randomMap.df <- data.frame(id = idList, shading = runif(length(idList)), centroids.df)
ggplot(data = region_of_interest.fort, aes(x = long, y = lat, fill = id, group = group)) +
geom_polygon() +
geom_text(centroids.df, aes(label = id, x = Longitude, y = Latitude)) +
scale_fill_gradient(high = "green", low = "red", guide = "colorbar") +
coord_equal() +
theme() +
ggtitle("Title")
It gives me the error: ggplot2 doesn't know how to deal with data of class uneval
My data
region_of_interest$GM_NAAM
[1] Groningen Haren Ooststellingwerf Assen Aa en Hunze Borger- Odoorn
[7] Noordenveld Westerveld Tynaarlo Midden-Drenthe
415 Levels: 's-Gravenhage 's-Hertogenbosch Aa en Hunze Aalburg Aalsmeer Aalten ... Zwolle
region_of_interest$score
[1] 10 -2 -1 2 -1 -4 -4 -5 0 0
Try something like this?
Get a data frame of the centroids of your polygons from the
original map object.
In the data frame you are plotting, ensure there are columns for
the ID you want to label, and the longitude and latitude of those
centroids.
Use geom_text in ggplot to add the labels.
Based on this example I read a world map, extracting the ISO3 IDs to use as my polygon labels, and make a data frame of countries' ID, population, and longitude and latitude of centroids. I then plot the population data on a world map and add labels at the centroids.
library(rgdal) # used to read world map data
library(rgeos) # to fortify without needing gpclib
library(maptools)
library(ggplot2)
library(scales) # for formatting ggplot scales with commas
# Data from http://thematicmapping.org/downloads/world_borders.php.
# Direct link: http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip
# Unpack and put the files in a dir 'data'
worldMap <- readOGR(dsn="data", layer="TM_WORLD_BORDERS_SIMPL-0.3")
# Change "data" to your path in the above!
worldMap.fort <- fortify(world.map, region = "ISO3")
# Fortifying a map makes the data frame ggplot uses to draw the map outlines.
# "region" or "id" identifies those polygons, and links them to your data.
# Look at head(worldMap#data) to see other choices for id.
# Your data frame needs a column with matching ids to set as the map_id aesthetic in ggplot.
idList <- worldMap#data$ISO3
# "coordinates" extracts centroids of the polygons, in the order listed at worldMap#data
centroids.df <- as.data.frame(coordinates(worldMap))
names(centroids.df) <- c("Longitude", "Latitude") #more sensible column names
# This shapefile contained population data, let's plot it.
popList <- worldMap#data$POP2005
pop.df <- data.frame(id = idList, population = popList, centroids.df)
ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(aes(fill = population), colour= "grey", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
scale_fill_gradient(high = "red", low = "white", guide = "colorbar", labels = comma) +
geom_text(aes(label = id, x = Longitude, y = Latitude)) + #add labels at centroids
coord_equal(xlim = c(-90,-30), ylim = c(-60, 20)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "World Population") +
theme_bw()
Minor technical note: actually coordinates in the sp package doesn't quite find the centroid, but it should usually give a sensible location for a label. Use gCentroid in the rgeos package if you want to label at the true centroid in more complex situations like non-contiguous shapes.
The accepted answer here may work, but the actual question asked specifically notes that there is an error "ggplot2 doesn't know how to deal with data of class uneval."
The reason that it is giving you the error is because the inclusion of centroids.df needs to be a named variable (e.g. accompanied by "data=")
Currently:
ggplot(data = region_of_interest.fort, aes(x = long, y = lat, fill = id, group = group)) +
geom_polygon() +
geom_text(centroids.df, aes(label = id, x = Longitude, y = Latitude)) +
scale_fill_gradient(high = "green", low = "red", guide = "colorbar") +
coord_equal() +
theme() +
ggtitle("Title")
Should be (note: "data=centroids.df"):
ggplot(data = region_of_interest.fort, aes(x = long, y = lat, fill = id, group = group)) +
geom_polygon() +
geom_text(data=centroids.df, aes(label = id, x = Longitude, y = Latitude)) +
scale_fill_gradient(high = "green", low = "red", guide = "colorbar") +
coord_equal() +
theme() +
ggtitle("Title")
This issue was addressed here: How to deal with "data of class uneval" error from ggplot2?

Resources