how to plot networks over a map with the least overlap - r

I have some authors with their city or country of affiliation. I would like to know if it is possible to plot the coauthors' networks (figure 1), on the map, having the coordinates of the countries. Please consider multiple authors from the same country. [EDIT: Several networks could be generated as in the example and should not show avoidable overlaps]. This is intended for dozens of authors. A zooming option is desirable. Bounty promise +100 for future better answer.
refs5 <- read.table(text="
row bibtype year volume number pages title journal author
Bennett_1995 article 1995 76 <NA> 113--176 angiosperms. \"Annals of Botany\" \"Bennett Md, Leitch Ij\"
Bennett_1997 article 1997 80 2 169--196 estimates. \"Annals of Botany\" \"Bennett MD, Leitch IJ\"
Bennett_1998 article 1998 82 SUPPL.A 121--134 weeds. \"Annals of Botany\" \"Bennett MD, Leitch IJ, Hanson L\"
Bennett_2000 article 2000 82 SUPPL.A 121--134 weeds. \"Annals of Botany\" \"Bennett MD, Someone IJ\"
Leitch_2001 article 2001 83 SUPPL.A 121--134 weeds. \"Annals of Botany\" \"Leitch IJ, Someone IJ\"
New_2002 article 2002 84 SUPPL.A 121--134 weeds. \"Annals of Botany\" \"New IJ, Else IJ\"" , header=TRUE,stringsAsFactors=FALSE)
rownames(refs5) <- refs5[,1]
refs5<-refs5[,2:9]
citations <- as.BibEntry(refs5)
authorsl <- lapply(citations, function(x) as.character(toupper(x$author)))
unique.authorsl<-unique(unlist(authorsl))
coauth.table <- matrix(nrow=length(unique.authorsl),
ncol = length(unique.authorsl),
dimnames = list(unique.authorsl, unique.authorsl), 0)
for(i in 1:length(citations)){
paper.auth <- unlist(authorsl[[i]])
coauth.table[paper.auth,paper.auth] <- coauth.table[paper.auth,paper.auth] + 1
}
coauth.table <- coauth.table[rowSums(coauth.table)>0, colSums(coauth.table)>0]
diag(coauth.table) <- 0
coauthors<-coauth.table
bip = network(coauthors,
matrix.type = "adjacency",
ignore.eval = FALSE,
names.eval = "weights")
authorcountry <- read.table(text="
author country
1 \"LEITCH IJ\" Argentina
2 \"HANSON L\" USA
3 \"BENNETT MD\" Brazil
4 \"SOMEONE IJ\" Brazil
5 \"NEW IJ\" Brazil
6 \"ELSE IJ\" Brazil",header=TRUE,fill=TRUE,stringsAsFactors=FALSE)
matched<- authorcountry$country[match(unique.authorsl, authorcountry$author)]
bip %v% "Country" = matched
colorsmanual<-c("red","darkgray","gainsboro")
names(colorsmanual) <- unique(matched)
gdata<- ggnet2(bip, color = "Country", palette = colorsmanual, legend.position = "right",label = TRUE,
alpha = 0.9, label.size = 3, edge.size="weights",
size="degree", size.legend="Degree Centrality") + theme(legend.box = "horizontal")
gdata
In other words, adding the names of authors, lines and bubbles to the map. Note, several authors maybe from the same city, or country and should not overlap.
Figure 1 Network
EDIT: The current JanLauGe answer overlaps two non-related networks. authors "ELSE" and "NEW" need to be apart from others as in figure 1.

Are you looking for a solution using exactly the packages you used, or would you be happy to use suite of other packages? Below is my approach, in which I extract the graph properties from the network object and plot them on a map using the ggplot2 and map package.
First I recreate the example data you gave.
library(tidyverse)
library(sna)
library(maps)
library(ggrepel)
set.seed(1)
coauthors <- matrix(
c(0,3,1,1,3,0,1,0,1,1,0,0,1,0,0,0),
nrow = 4, ncol = 4,
dimnames = list(c('BENNETT MD', 'LEITCH IJ', 'HANSON L', 'SOMEONE ELSE'),
c('BENNETT MD', 'LEITCH IJ', 'HANSON L', 'SOMEONE ELSE')))
coords <- data_frame(
country = c('Argentina', 'Brazil', 'USA'),
coord_lon = c(-63.61667, -51.92528, -95.71289),
coord_lat = c(-38.41610, -14.23500, 37.09024))
authorcountry <- data_frame(
author = c('LEITCH IJ', 'HANSON L', 'BENNETT MD', 'SOMEONE ELSE'),
country = c('Argentina', 'USA', 'Brazil', 'Brazil'))
Now I generate the graph object using the snp function network
# Generate network
bip <- network(coauthors,
matrix.type = "adjacency",
ignore.eval = FALSE,
names.eval = "weights")
# Graph with ggnet2 for centrality
gdata <- ggnet2(bip, color = "Country", legend.position = "right",label = TRUE,
alpha = 0.9, label.size = 3, edge.size="weights",
size="degree", size.legend="Degree Centrality") + theme(legend.box = "horizontal")
From the network object we can extract the values of each edge, and from the ggnet2 object we can get degree of centrality for nodes as below:
# Combine data
authors <-
# Get author numbers
data_frame(
id = seq(1, nrow(coauthors)),
author = sapply(bip$val, function(x) x$vertex.names)) %>%
left_join(
authorcountry,
by = 'author') %>%
left_join(
coords,
by = 'country') %>%
# Jittering points to avoid overlap between two authors
mutate(
coord_lon = jitter(coord_lon, factor = 1),
coord_lat = jitter(coord_lat, factor = 1))
# Get edges from network
networkdata <- sapply(bip$mel, function(x)
c('id_inl' = x$inl, 'id_outl' = x$outl, 'weight' = x$atl$weights)) %>%
t %>% as_data_frame
dt <- networkdata %>%
left_join(authors, by = c('id_inl' = 'id')) %>%
left_join(authors, by = c('id_outl' = 'id'), suffix = c('.from', '.to')) %>%
left_join(gdata$data %>% select(label, size), by = c('author.from' = 'label')) %>%
mutate(edge_id = seq(1, nrow(.)),
from_author = author.from,
from_coord_lon = coord_lon.from,
from_coord_lat = coord_lat.from,
from_country = country.from,
from_size = size,
to_author = author.to,
to_coord_lon = coord_lon.to,
to_coord_lat = coord_lat.to,
to_country = country.to) %>%
select(edge_id, starts_with('from'), starts_with('to'), weight)
Should look like this now:
dt
# A tibble: 8 × 11
edge_id from_author from_coord_lon from_coord_lat from_country from_size to_author to_coord_lon
<int> <chr> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
1 1 BENNETT MD -51.12756 -16.992729 Brazil 6 LEITCH IJ -65.02949
2 2 BENNETT MD -51.12756 -16.992729 Brazil 6 HANSON L -96.37907
3 3 BENNETT MD -51.12756 -16.992729 Brazil 6 SOMEONE ELSE -52.54160
4 4 LEITCH IJ -65.02949 -35.214117 Argentina 4 BENNETT MD -51.12756
5 5 LEITCH IJ -65.02949 -35.214117 Argentina 4 HANSON L -96.37907
6 6 HANSON L -96.37907 36.252312 USA 4 BENNETT MD -51.12756
7 7 HANSON L -96.37907 36.252312 USA 4 LEITCH IJ -65.02949
8 8 SOMEONE ELSE -52.54160 -9.551913 Brazil 2 BENNETT MD -51.12756
# ... with 3 more variables: to_coord_lat <dbl>, to_country <chr>, weight <dbl>
Now moving on to plotting this data on a map:
world_map <- map_data('world')
myMap <- ggplot() +
# Plot map
geom_map(data = world_map, map = world_map, aes(map_id = region),
color = 'gray85',
fill = 'gray93') +
xlim(c(-120, -20)) + ylim(c(-50, 50)) +
# Plot edges
geom_segment(data = dt,
alpha = 0.5,
color = "dodgerblue1",
aes(x = from_coord_lon, y = from_coord_lat,
xend = to_coord_lon, yend = to_coord_lat,
size = weight)) +
scale_size(range = c(1,3)) +
# Plot nodes
geom_point(data = dt,
aes(x = from_coord_lon,
y = from_coord_lat,
size = from_size,
colour = from_country)) +
# Plot names
geom_text_repel(data = dt %>%
select(from_author,
from_coord_lon,
from_coord_lat) %>%
unique,
colour = 'dodgerblue1',
aes(x = from_coord_lon, y = from_coord_lat, label = from_author)) +
coord_equal() +
theme_bw()
Obviously you can change the colour and design in the usual way with ggplot2 grammar. Notice that you could also use geom_curve and the arrow aesthetic to get a plot similar to the one in the uber post linked in the comments above.

As an effort to avoid the overlapping of the 2 networks, I came to this modification of the x and y coordenates of the ggplot, which by default does not overlap the networks, see figure 1 in the question.
# get centroid positions for countries
# add coordenates to authorcountry table
# download and unzip
# https://worldmap.harvard.edu/data/geonode:country_centroids_az8
setwd("~/country_centroids_az8")
library(rgdal)
cent <- readOGR('.', "country_centroids_az8", stringsAsFactors = F)
countrycentdf<-cent#data[,c("name","Longitude","Latitude")]
countrycentdf$name[which(countrycentdf$name=="United States")]<-"USA"
colnames(countrycentdf)[names(countrycentdf)=="name"]<-"country"
authorcountry$Longitude<-countrycentdf$Longitude[match(authorcountry$country,countrycentdf$country)]
authorcountry$Latitude <-countrycentdf$Latitude [match(authorcountry$country,countrycentdf$country)]
# original coordenates of plot and its transformation
ggnetbuild<-ggplot_build(gdata)
allcoord<-ggnetbuild$data[[3]][,c("x","y","label")]
allcoord$Latitude<-authorcountry$Latitude [match(allcoord$label,authorcountry$author)]
allcoord$Longitude<-authorcountry$Longitude [match(allcoord$label,authorcountry$author)]
allcoord$country<-authorcountry$country [match(allcoord$label,authorcountry$author)]
# increase with factor the distance among dots
factor<-7
allcoord$coord_lat<-allcoord$y*factor+allcoord$Latitude
allcoord$coord_lon<-allcoord$x*factor+allcoord$Longitude
allcoord$author<-allcoord$label
# plot as in answer of JanLauGe, without jitter
library(tidyverse)
library(ggrepel)
authors <-
# Get author numbers
data_frame(
id = seq(1, nrow(coauthors)),
author = sapply(bip$val, function(x) x$vertex.names)) %>%
left_join(
allcoord,
by = 'author')
# Continue as in answer of JanLauGe
networkdata <- ##
dt <- ##
world_map <- map_data('world')
myMap <- ##
myMap

Related

Order legend in a R Plotly Bubblemap following factor order

I'm working on a Bubble map where I generated two columns, one for a color id (column Color) and one for a text refering to the id (column Class). This is a classification of my individuals (Color always belongs to Class).
Class is a factor following a certain order that I made with :
COME1039$Class <- as.factor(COME1039$Class, levels = c('moins de 100 000 F.CFP',
'entre 100 000 et 5 millions F.CFP',
'entre 5 millions et 1 milliard F.CFP',
'entre 1 milliard et 20 milliards F.CFP',
'plus de 20 milliards F.CFP'))
This is my code
g <- list(
scope = 'world',
visible = F,
showland = TRUE,
landcolor = toRGB("#EAECEE"),
showcountries = T,
countrycolor = toRGB("#D6DBDF"),
showocean = T,
oceancolor = toRGB("#808B96")
)
COM.g1 <- plot_geo(data = COME1039,
sizes = c(1, 700))
COM.g1 <- COM.g1 %>% add_markers(
x = ~LONGITUDE,
y = ~LATITUDE,
name = ~Class,
size = ~`Poids Imports`,
color = ~Color,
colors=c(ispfPalette[c(1,2,3,7,6)]),
text=sprintf("<b>%s</b> <br>Poids imports: %s tonnes<br>Valeur imports: %s millions de F.CFP",
COME1039$NomISO,
formatC(COME1039$`Poids Imports`/1000,
small.interval = ",",
digits = 1,
big.mark = " ",
decimal.mark = ",",
format = "f"),
formatC(COME1039$`Valeur Imports`/1000000,
small.interval = ",",
digits = 1,
big.mark = " ",
decimal.mark = ",",
format = "f")),
hovertemplate = "%{text}<extra></extra>"
)
COM.g1 <- COM.g1%>% layout(geo=g)
COM.g1 <- COM.g1%>% layout(dragmode=F)
COM.g1 <- COM.g1 %>% layout(showlegend=T)
COM.g1 <- COM.g1 %>% layout(legend = list(title=list(text='Valeurs des importations<br>'),
orientation = "h",
itemsizing='constant',
x=0,
y=0)) %>% hide_colorbar()
COM.g1
Unfortunately my data are too big to be added here, but this is the output I get :
As you can see, the order of the legend is not the one of the factor levels. How to get it ? If data are mandatory to help you to give me a hint, I will try to limit their size.
Many thanks !
Plotly is going to alphabetize your legend and you have to 'make' it listen. The order of the traces in your plot is the order in which the items appear in your legend. So if you rearrange the traces in the object, you'll rearrange the legend.
I don't have your data, so I used some data from rnaturalearth.
First I created a plot, using plot_geo. Then I used plotly_build() to make sure I had the trace order in the Plotly object. I used lapply to investigate the current order of the traces. Then I created a new order, rearranged the traces, and plotted it again.
The initial plot and build.
library(tidyverse)
library(plotly)
library(rnaturalearth)
canada <- ne_states(country = "Canada", returnclass = "SF")
x = plot_geo(canada, sizes = c(1, 700)) %>%
add_markers(x = ~longitude, y = ~latitude,
name = ~name, color = ~name)
x <- plotly_build(x) # capture all elements of the object
Now for the investigation; this is more so you can see how this all comes together.
# what order are they in?
y = vector()
invisible(
lapply(1:length(x$x$data),
function(i) {
z <- x$x$data[[i]]$name
message(i, " ", z)
})
)
# 1 Alberta
# 2 British Columbia
# 3 Manitoba
# 4 New Brunswick
# 5 Newfoundland and Labrador
# 6 Northwest Territories
# 7 Nova Scotia
# 8 Nunavut
# 9 Ontario
# 10 Prince Edward Island
# 11 Québec
# 12 Saskatchewan
# 13 Yukon
In your question, you show that you made the legend element a factor. That's what I've done as well with this data.
can2 = canada %>%
mutate(name = ordered(name,
levels = c("Manitoba", "New Brunswick",
"Newfoundland and Labrador",
"Northwest Territories",
"Alberta", "British Columbia",
"Nova Scotia", "Nunavut",
"Ontario", "Prince Edward Island",
"Québec", "Saskatchewan", "Yukon")))
I used the data to reorder the traces in my Plotly object. This creates a vector. It starts with the levels and their row number or order (1:13). Then I alphabetized the data by the levels (so it matches the current order in the Plotly object).
The output of this set of function calls is a vector of numbers (i.e., 5, 6, 1, etc.). Since I have 13 names, I have 1:13. You could always make it dynamic, as well 1:length(levels(can2$name).
# capture order
df1 = data.frame(who = levels(can2$name), ord = 1:13) %>%
arrange(who) %>% select(ord) %>% unlist()
Now all that's left is to rearrange the object traces and visualize it.
x$x$data = x$x$data[order(c(df1))] # reorder the traces
x # visualize
Originally:
With reordered traces:

GGPLOT2 Line plots from an R list containing vectors and single numeric values

I have an R list that contains 2500 lists in it. Each of 2500 lists contain 1 vector and 2 values. For the sake of reproducibility, I subset a tiny version of the data so it looks something like this:
head(models, 1)
>$model_1
>$model_1$m
> [1] 0.01335775 0.01336487 0.01336805 0.01338025 0.01340532 0.01343117 0.01346120 0.01349530 0.01353788 > 0.01357654 0.01360668
>$model_1$Cab
>[1] 59.6774
>$model_1$LAI
>[1] 4.01739
>$model_2
>$model_2$m
> [1] 0.02367338 0.02360433 0.02352800 0.02346125 0.02339469 0.02333403 0.02325861 0.02317945 0.02310961 >0.02303802 0.02295710
>$model_2$Cab
>[1] 59.6774
>$model_2$LAI
>[1] 0.5523946
Now, I want to make a line plot (using ggplot2) whose x axis is values from 400 to 410 and y axis is the vector in each lists (models$model_1$m, models$model_2$m and so on.) Therefore, there will be a lot of lines in the plot. I also want to color (continuous coloring) each line with their respective models$model_2$Cab values and have a continuous legend showing each models$model_2$Cab value and its color.
For reproducibility (Please note that this is greatly simplified version of the original data):
> dput(head(models, 10))
list(model_1 = list(m = c(0.0133577497667816, 0.0133648693063468,
0.0133680481888036, 0.01338024983382, 0.0134053218864944, 0.0134311717034271,
0.0134612003419723, 0.0134953017438241, 0.0135378825635721, 0.0135765418166368,
0.0136066826886183), Cab = 59.6773970406502, LAI = 4.01739045299768),
model_2 = list(m = c(0.023673375903171, 0.0236043348551818,
0.0235280045196734, 0.0234612496831449, 0.0233946873132861,
0.0233340349230324, 0.0232586128971129, 0.0231794538902946,
0.0231096074536893, 0.023038021285693, 0.0229570982021948
), Cab = 59.6773970406502, LAI = 0.552394618641403), model_3 = list(
m = c(0.0138277418755234, 0.0138310132688916, 0.0138301891768216,
0.0138383905159343, 0.0138587906203227, 0.0138802253169266,
0.0139048786261519, 0.0139332011615252, 0.0139700189737812,
0.0140030367215791, 0.0140275202380309), Cab = 59.6773970406502,
LAI = 3.01987725977579), model_4 = list(m = c(0.017483089696901,
0.0174591709902523, 0.017429967081058, 0.0174099884420304,
0.0173976896061841, 0.0173882607103241, 0.0173752969257632,
0.0173632160871019, 0.0173599236031355, 0.0173536114293099,
0.0173384748063733), Cab = 59.6773970406502, LAI = 1.37503600459533),
model_5 = list(m = c(0.0182499047037402, 0.0182203724940146,
0.0181853063358603, 0.0181595102703982, 0.0181404648083386,
0.0181246681180869, 0.0181039409709977, 0.01808352264341,
0.0180719579429791, 0.018057532687598, 0.0180342187796566
), Cab = 59.6773970406502, LAI = 1.22529135635182), model_6 = list(
m = c(0.0158200567917405, 0.0158083674745268, 0.0157919331298277,
0.0157846269346119, 0.0157870246965916, 0.0157914665730281,
0.0157954117645301, 0.0158014906653224, 0.0158162176575737,
0.0158275775312257, 0.0158302513933357), Cab = 59.6773970406502,
LAI = 1.81571552453658), model_7 = list(m = c(0.0133628950691214,
0.0133699680411211, 0.0133730986417069, 0.0133852517083498,
0.0134102666346747, 0.0134360623898904, 0.0134660252680654,
0.0135000559061319, 0.0135425658393117, 0.013581155812944,
0.013611227528355), Cab = 59.6773970406502, LAI = 3.99643688124574),
model_8 = list(m = c(0.0183501671255408, 0.0183199017377111,
0.0182840698901064, 0.0182575139774255, 0.0182375872739662,
0.0182209588085648, 0.0181992175650369, 0.0181777101462036,
0.0181650648958527, 0.0181495798700031, 0.0181251977995322
), Cab = 59.6773970406502, LAI = 1.20735517669905), model_9 = list(
m = c(0.0143687162679524, 0.0143678440890305, 0.0143626995592654,
0.0143666036037224, 0.0143820089259476, 0.0143987279254991,
0.0144176359711743, 0.0144397860850458, 0.0144704682720878,
0.0144974726755733, 0.0145159061770205), Cab = 59.6773970406502,
LAI = 2.51320168699674), model_10 = list(m = c(0.0138736072820698,
0.0138765215672426, 0.0138753253418108, 0.0138831561248062,
0.0139031250366076, 0.0139241525443688, 0.0139483098566198,
0.0139760994306543, 0.0140123870383231, 0.0140448852992375,
0.0140688465774421), Cab = 59.6773970406502, LAI = 2.96397596054064))
What I want to achieve is something like this (but with a better-looking ggplot2):
This could be achieved like so:
Convert your list of lists to a list of dataframes.
Add a variable with your x-axis variable to each df
Bind the list of data frames by row
Plot, where I make use of scale_colour_gradientn(colors = rainbow(20)) to mimic your rainbow color scale.
library(dplyr)
library(ggplot2)
models <- lapply(models, as.data.frame) %>%
lapply(function(x) { x$x <- 400:410; x}) %>%
bind_rows(.id = "id")
ggplot(models, aes(x = x, y = m, color = LAI, group = id)) +
geom_line() +
scale_x_continuous(breaks = scales::pretty_breaks()) +
scale_colour_gradientn(colors = rainbow(20))

Repeating values on map:leaflet map

I am trying to make a map using leaflet. I uploaded a shapefile of 216 districts. I also have a dataset with information from 7 out the 216 districts.
I was hoping for the map to have districts that don't have values or 0% in grey saying not enough information". While having districts with actual values (>0%) showing up as colour following their corresponding bins.
When I tried to do upload my dataset and shapfile, I got a map with coloured districts everywhere. Based on my dataset, there are suppose to be 4 districts (>0%) in colour. But this is not what I see on my map.
How do I make sure that only the districts in my dataset light up where it is suppose to light up, without repeating all over the map? (while maintaining the backdrop of all the other districts in grey)
So far this is the code I used to achieved the map:
districtsg <-readOGR("sample/copyfile/Districts/Map_of_Districts_.shp")
districtsg <- sp::spTransform(districtsg, CRS("+proj=longlat +datum=WGS84"))
wpnew <- wpnew [order(match(wpnew$District,districtsg$NAME)),]
bins <- c(0.1,2.0,5.0,10.0,25.0,40.0,50.0)
pal<- colorBin("YlOrRd",domain=wpnew$per.content,bins=bins)
m<- leaflet() %>%
setView(lng = -1.0232,lat=7.9465,zoom = 6) %>%
addProviderTiles(providers$Stamen.Toner) %>%
addPolygons(data =districtsg,
weight = 1,
smoothFactor = 0.5,
color = "White",
fillOpacity = 0.8,
fillColor= ~pal(wpnew$per.content),
highlight = highlightOptions(
weight = 5,
color = "#666666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE
))
m
labels <- paste( "<p>","District:", districtsg$NAME,"</p>",
"<p>", "% of reports that are content:",round(wpnew$per.content,digits = 3),"</p>",
"<p>", "Total reports labelled as a content:",round(wpnew$totalcontent,digits = 3),"</p>",
"<p>", "Total reports from this district:",round(wpnew$totalreports,digits = 3),"</p>",sep = "" )
m<- leaflet() %>%
setView(lng = -1.0232,lat=7.9465,zoom = 6) %>%
addProviderTiles(providers$Stamen.Toner) %>%
addPolygons(data =districtsg,
weight = 1,
smoothFactor = 0.5,
color = "White",
fillOpacity = 0.8,
fillColor= pal(wpnew$per.content),
label = lapply(labels,HTML)) %>%
addLegend(pal=pal,
values = wpnew$per.content,
opacity = 0.7,
"topright")
m
districts totalreports totalcontent per.content
1 Jomoro 4 2 50.00000
2 Ellembelle 2 1 50.00000
3 Tarkwa Nsuaem 1 0 0.00000
4 Bia West 1 0 0.00000
5 Bodi 2 0 0.00000
6 Accra Metropolis 3 1 33.33333
7 Adenta 3 1 33.33333
shapefile can be downloaded here:
https://data.gov.gh/dataset/shapefiles-all-districts-ghana-2012-216-districts
I handling the joining of shape file and the data file differently and I create my base map using tmap. but perhaps this will be helpful.
library(rgdal)
library(tmap)
library(leaflet)
####Access shape map
elem <- readOGR(dsn = "Data/P3Map", layer = "Boundary___ES")
####Preschool Status for Elementary Schools####
schoolAdresses_PK_2021 <- read_excel("Data/P3Map/schoolAdresses_PK_2021.xlsx") %>%
mutate(PreK= factor(PreK)) %>%
clean_names("lower_camel") %>%
mutate(programType = factor(programType))
##### Merge shape with PreK info######
map <- merge(elem, by.x = "ES_Name", schoolAdresses_PK_2021, by.y = "esName" )
#### Render Map####
MyColors <- c('#e2a331', '#d3d3d3','#3688c8') #yellow, #grey, #blue
PKMap <- tm_shape(map)+
tm_fill(col="preK",
title = " ",
palette = MyColors)+
tm_shape(JeffcoMap)+
tm_borders(col = "white")+
tm_layout("Jeffco PreK Expansion 2019-2020", legend.text.size = -0.5)+
tm_text(text = "ES_ShortNa", size = 0.5, group = "Site Names")
PKMap %>% tmap_leaflet() %>%
setView(lng = -105.10033, lat = 39.6, zoom =9) %>% #lat and long of my district
addProviderTiles('Esri.WorldGrayCanvas', group='Grayscale Map') %>%
addProviderTiles('OpenStreetMap', group='Street Map') %>%
addMarkers(lng = -105.155927, #add marker for PK detached from elementary
lat = 39.746347,
icon = YellowIcon,
label = "Litz",
popup = "<b>Program type:</b><br>Ext. Day",
popupOptions = labelOptions(direction = "bottom",
textsize = "8px"),
group = "Stand alone PreK")
from here you can add leaflet layers
It's tough without your data, but I hope this is helpful. In my case, I am mapping 95 elementary schools in one district.
Your 'districtsg' = My 'elem'
Your 'wpnew' = My 'map'
Example map
Here is my attempt while using your datasets:
library(rgdal)
library(tmap)
library(leaflet)
library(sp)
districtsg <-readOGR('data/Map_of_Districts_216.shp')
wpnew <- read.csv('data/dataFromStack.csv')
map <- sp::merge(x = districtsg, y = wpnew, by = "NAME")
MyColors <- c('#e2a331', '#d3d3d3','#3688c8') #yellow, #grey, #blue
tm_shape(map)+
tm_fill(col="totalcontent",
title = " ",
palette = MyColors)+
tm_shape(districtsg)+
tm_borders(col = "white")
Here is the result that I get.. It does take a moment to render in the R Studio Viewer

First painful attempt to do a Spatial map

I am struggling to get my first map to work. I have read every document I could find but I am not able to pull it all together to view my data on a map.
This is what I have done so far.
1. I created a very basic data table with 3 observations and 5 variables as a very simple starting point.
str(Datawithlatlongnotvector)
'data.frame': 3 obs. of 5 variables:
$ Client: Factor w/ 3 levels "Jan","Piet","Susan": 2 1 3
$ Sales : int 100 1000 15000
$ Lat : num 26.2 33.9 23.9
$ Lon : num 28 18.4 29.4
$ Area : Factor w/ 3 levels "Gauteng","Limpopo",..: 1 3 2
(the Area is the provinces of South Africa and also is as per the SHP file that I downloaded, see below)
I downloaded a map of South Africa and placed all 3 files (.dbf, shp and shx) files in the same directory - previous error but I found the answer from another user's question. http://www.mapmakerdata.co.uk.s3-website-eu-west-1.amazonaws.com/library/stacks/Africa/South%20Africa/index.htm and selected Simple base map.
I created a map as follows :
SAMap <- readOGR(dsn = ".", layer = "SOU-level_1")
and I can plot the map of the country showing the provinces with plot(SAMap)
I can also plot the data
plot(datawithlatlong)
I saw the instructions how to make a SpatialPointsData frame and I did that :
coordinates(Datawithlatlong) = ~Lat + Lon
I do not know how to pull it all together and do the following :
Show the data (100,1000 and 15000) on the map with different colours i.e. between 1 and 500 is one colour, between 501 and 10 000 is one colour and above 10 000 is one colour.
Maybe trying ggplot2 with some function like:
map = ggplot(df, aes(long, lat, fill = Sales_cat)) + scale_fill_brewer(type = "seq", palette = "Oranges", name = "Sales") + geom_polygon()
With scale_fill_brewer you can represent scales in terms of colours on the map. You should create a factor variable that represents categories according to the range of sales ("Sales_cat"). In any case, the shape file must be transformed into a data.frame.
Try this for 'SAMap' as the country shapefile and 'datawithlatlong' as your data convereted to SpatialPointDataFrame:
library(maptools)
library(classInt)
library(RColorBrewer)
# Prepare colour pallete
plotclr <- brewer.pal(3,"PuRd")
class<-classIntervals(datawithlatlong#data$sales, n=3, style="fixed", fixedBreaks=c(0, 500,1000,10000)) # you can adjust the intervals here
colcode <- findColours(class, plotclr)
# Plot country map
plot(SAMap,xlim=c(16, 38.0), ylim=c(-46,-23))# plot your polygon shapefile with appropriate xlim and ylim (extent)
# Plot dataframe convereted to SPDF (in your step 5)
plot(datawithlatlong, col=colcode, add=T,pch=19)
# Creating the legend
legend(16.2, -42, legend=names(attr(colcode, "table")), fill=attr(colcode, "palette"), cex=0.6, bty="n") # adjust the x and y for fixing appropriate location for the legend
I generated a bigger dataset because I think with only 3 points it hard to see how things are working.
library(rgdal)
library(tmap)
library(ggmap)
library(randomNames)
#I downloaded the shapefile with the administrative area polygons
map <- readOGR(dsn = ".", layer = "SOU")
#the coordinate system is not part of the loaded object hence I added this information
proj4string(map) <- CRS("+init=epsg:4326")
# Some sample data with random client names and random region
ADM2 <- sample(map#data$ADM2, replace = TRUE, 50)
name <- randomNames(50)
sales <- sample(0:5000, 50)
clientData <- data.frame(id = 1:50, name, region = as.character(ADM2), sales,
stringsAsFactors = FALSE)
#In order to add the geoinformation for each client I used the awesome
#function `ggmap::geocode` which takes a character string as input an
#provides the lon and lat for the region, city ...
geoinfo <- geocode(clientData$region, messaging = FALSE)
# Use this information to build a Point layer
clientData_point <- SpatialPointsDataFrame(geoinfo, data = clientData)
proj4string(clientData_point) <- CRS("+init=epsg:4326")
Now the part I hope that answers the question:
# Adding all sales which occured in one region
# If there are 3 clients in one region, the sales of the three are
# summed up and returned in a new layer
sales_map <- aggregate(x = clientData_point[ ,4], by = map, FUN = sum)
# Building a map using the `tmap` package`
tm_shape(sales_map) + tm_polygons(col = "sales")
Edit:
Here is a ggplot2 solution because it seems you want to stick with it.
First, for ggplot you have to transform your SpatialPolygonDataFrame to an ordinary data.frame. Fortunately, broom::tidy() will do the job automatically.
Second, your Lat values are missing a -. I added it.
Third, I renamed your objects for less typing.
point_layer<- structure(list(Client = structure(c(2L, 1L, 3L),
.Label = c("Jan", "Piet", "Susan"),
class = "factor"),
Sales = c(100, 1000, 15000 ),
Lat = c(-26.2041, -33.9249, -23.8962),
Lon = c(28.0473, 18.4241, 29.4486),
Area = structure(c(1L, 3L, 2L),
.Label = c("Gauteng", "Limpopo", "Western Cape"),
class = "factor"),
Sale_range = structure(c(1L, 2L, 4L),
.Label = c("(1,500]", "(500,2e+03]", "(2e+03,5e+03]", "(5e+03,5e+04]"),
class = "factor")),
.Names = c("Client", "Sales", "Lat", "Lon", "Area", "Sale_range"),
row.names = c(NA, -3L), class = "data.frame")
point_layer$Sale_range <- cut(point_layer$Sales, c(1,500.0,2000.0,5000.0,50000.0 ))
library(broom)
library(ggplot2)
ggplot_map <- tidy(map)
ggplot() + geom_polygon(ggplot_map, mapping = aes(x = long, y = lat, group = group),
fill = "grey65", color = "black") +
geom_point(point_layer, mapping = aes(x = Lon, y = Lat, col = Sale_range)) +
scale_colour_brewer(type = "seq", palette = "Oranges", direction = 1)

Mapping nearest neighbours of a long-lat data set using ggmap, geom_point and a loop

My ultimate goal is to connect all nearest neighbours of a set of buildings (based on Euclidean distance) on a ggmap using geom_path from the ggplot2 package. I need help with a loop that will allow me to plot all neighbours as easily as possible
I have created a distance matrix (called 'kmnew') in kilometres between 3 types of building in Beijing: B (x2), D (x2) and L (x1):
B B D D L
B NA 6.599014 5.758531 6.285787 3.770175
B NA NA 7.141096 3.873296 5.092667
D NA NA NA 3.690725 2.563017
D NA NA NA NA 2.832083
L NA NA NA NA NA
I try to discern the nearest neighbours of each building by row by declaring a matrix and using a loop to ascertain the nearest neighbour building:
nn <- matrix(NA,nrow=5,ncol=1)
for (i in 1:nrow(kmnew)){
nn[i,] <- which.min(kmnew[i,])
}
This returns the following error (not sure why):
Error in nn[i, ] <- which.min(kmnew[i, ]) : replacement has length zero
but seems to return the correct answer to nn:
[,1]
[1,] 5
[2,] 4
[3,] 5
[4,] 5
[5,] NA
I append this to an original dataframe called newbjdata:
colbj <- cbind(newbjdata,nn)
that returns
Name Store sqft long lat nn
1 B 1 1200 116.4579 39.93921 5
2 B 2 750 116.3811 39.93312 4
3 D 1 550 116.4417 39.88882 5
4 D 2 600 116.4022 39.90222 5
5 L 1 1000 116.4333 39.91100 NA
I then retrieve my map via ggmap:
bjgmap <- get_map(location = c(lon = 116.407395,lat = 39.904211),
zoom = 13, scale = "auto",
maptype = "roadmap",
messaging = FALSE, urlonly = FALSE,
filename = "ggmaptemp", crop = TRUE,
color = "bw",
source = "google", api_key)
My ultimate goal is to map the nearest neighbours together in a plot using geom_path from the ggplot package.
For example, the nn of the 1st building of type B (row 1) is the 1 building of type L (row 5). Obviously I can draw this line by subsetting the said 2 rows of the dataframe thus:
ggmap(bjgmap) +
geom_point(data = colbj, aes(x = long,y = lat, fill = factor(Name)),
size =10, pch = 21, col = "white") +
geom_path(data = subset(colbj[c(1,5),]), aes(x = long,y = lat),col = "black")
However, I need a solution that works like a loop, and I can't figure out how one might achieve this, as I need to reference the nn column and refer that back to the long lat data n times. I can well believe that I am not using the most efficient method, so am open to alternatives. Any help much appreciated.
Here is my attempt. I used gcIntermediate() from the geosphere package to set up lines. First, I needed to rearrange your data. When you use gcIntermediate(), you need departure and arrival long/lat. That is you need four columns. In order to arrange your data in this way, I used the dplyr package. mutate_each(colbj, funs(.[nn]), vars = long:lat) works for you to pick up desired arrival long/lat. . is for 'long' and 'lat'. [nn] is the vector index for the variables. Then, I employed gcIntermediate(). This creates SpatialLines. You need to make the object a SpatialLinesDataFrame. Then, you need to convert the output to "normal" data.frame. This step is essential so that ggplot can read your data. fortify() is doing the job.
library(ggmap)
library(geosphere)
library(dplyr)
library(ggplot2)
### Arrange the data: set up departure and arrival long/lat
mutate_each(colbj, funs(.[nn]), vars = long:lat) %>%
rename(arr_long = vars1, arr_lat = vars2) %>%
filter(complete.cases(nn)) -> mydf
### Get line information
rts <- gcIntermediate(mydf[,c("long", "lat")],
mydf[,c("arr_long", "arr_lat")],
50,
breakAtDateLine = FALSE,
addStartEnd = TRUE,
sp = TRUE)
### Convert the routes to a data frame for ggplot use
rts <- as(rts, "SpatialLinesDataFrame")
rts.df <- fortify(rts)
### Get a map (borrowing the OP's code)
bjgmap <- get_map(location = c(lon = 116.407395,lat = 39.904211),
zoom = 13, scale = "auto",
maptype = "roadmap",
messaging = FALSE, urlonly = FALSE,
filename = "ggmaptemp", crop = TRUE,
color = "bw",
source = "google", api_key)
# Draw the map
ggmap(bjgmap) +
geom_point(data = colbj,aes(x = long, y = lat, fill = factor(Name)),
size = 10,pch = 21, col = "white") +
geom_path(data = rts.df, aes(x = long, y = lat, group = group),
col = "black")
EDIT
If you want to do all data manipulation in one sequence, the following is one way to go. foo is identical to rts.df above.
mutate_each(colbj, funs(.[nn]), vars = long:lat) %>%
rename(arr_long = vars1, arr_lat = vars2) %>%
filter(complete.cases(nn)) %>%
do(fortify(as(gcIntermediate(.[,c("long", "lat")],
.[,c("arr_long", "arr_lat")],
50,
breakAtDateLine = FALSE,
addStartEnd = TRUE,
sp = TRUE), "SpatialLinesDataFrame"))) -> foo
identical(rts.df, foo)
#[1] TRUE
DATA
colbj <- structure(list(Name = structure(c(1L, 1L, 2L, 2L, 3L), .Label = c("B",
"D", "L"), class = "factor"), Store = c(1L, 2L, 1L, 2L, 1L),
sqft = c(1200L, 750L, 550L, 600L, 1000L), long = c(116.4579,
116.3811, 116.4417, 116.4022, 116.4333), lat = c(39.93921,
39.93312, 39.88882, 39.90222, 39.911), nn = c(5L, 4L, 5L,
5L, NA)), .Names = c("Name", "Store", "sqft", "long", "lat",
"nn"), class = "data.frame", row.names = c("1", "2", "3", "4",
"5"))

Resources