See unmatched countries for joinCountryData2Map in rworldmap? - r

I'm using the joinCountryData2Map function in rworldmap to match my data to the countries in the world map.
I get this result:
230 codes from your data successfully matched countries in the map
11 codes from your data failed to match with a country code in the map
11 codes from the map weren't represented in your data
I cannot figure out how to view those two lists of 11 countries. I am guessing that those 11 countries have issues with their ISO2 codes that I need to correct, but am not sure which ones to check without being able to view those two lists.
I'm guessing there's a solution along the lines of just View(SomeObject$Countries) but I haven't been able to find anything that works.

Set joinCountryData2Map(...,verbose=TRUE) to print the names of the countries that failed to match in the console.
From the FAQ: "You can see that a summary of how many countries are successfully joined is output to the console. You can specify verbose=TRUE to get a full list of countries"
library(rworldmap)
data(countryExData)
# Set Angola to fail
countryExData[countryExData$ISO3V10 == "AGO", "ISO3V10"] <- "AGO_FAIL"
# Attempt to join
# With verbose=TRUE, failed joins (ie Angola) are printed in the console
sPDF <- joinCountryData2Map(
countryExData[,c("ISO3V10", "Country")],
joinCode = "ISO3",
nameJoinColumn = "ISO3V10",
verbose = TRUE)
# > 148 codes from your data successfully matched countries in the map
# > 1 codes from your data failed to match with a country code in the map
# > failedCodes failedCountries
# > [1,] "AGO_FAIL" "Angola"
# > 95 codes from the map weren't represented in your data
But what if you want to get the information on failed joins programmatically? I may have missed something, but I don't see an option for that (i.e., str(sPDF) or function arguments). However, looking at the internals of joinCountryData2Map(), the object failedCountries contains the info you want, so it should be easy enough to include it in the returned object.
Here's how you could modify joinCountryData2Map() to return a list with two elements: the first element is the default object, and the second element is failedCountries.
# Modify the function to return the failed joins in the environment
joinCountryData2Map_wfails <- function(
dF, joinCode = "ISO3", nameJoinColumn = "ISO3V10",
nameCountryColumn = "Country", suggestForFailedCodes = FALSE,
mapResolution = "coarse", projection = NA, verbose = FALSE) {
# Retain successful join as first element and failed join as second element
ll <- list() # MODIFIED
mapWithData <- getMap(resolution = mapResolution)
if (!is.na(projection))
warning("the projection argument has been deprecated, returning Lat Lon, use spTransform from package rgdal as shown in help details or the FAQ")
listJoinCodesNew <- c("ISO_A2", "ISO_A3", "FIPS_10_",
"ADMIN", "ISO_N3")
listJoinCodesOld <- c("ISO2", "ISO3", "FIPS",
"NAME", "UN")
listJoinCodes <- c(listJoinCodesOld, listJoinCodesNew)
if (joinCode %in% listJoinCodes == FALSE) {
stop("your joinCode (", joinCode, ") in joinCountryData2Map() is not one of those supported. Options are :",
paste(listJoinCodes, ""), "\n")
return(FALSE)
}
joinCodeOld <- joinCode
if (joinCode %in% listJoinCodesOld) {
joinCode <- listJoinCodesNew[match(joinCode, listJoinCodesOld)]
}
if (is.na(match(nameJoinColumn, names(dF)))) {
stop("your chosen nameJoinColumn :'", nameJoinColumn,
"' seems not to exist in your data, columns = ",
paste(names(dF), ""))
return(FALSE)
}
dF[[joinCode]] <- as.character(dF[[nameJoinColumn]])
dF[[joinCode]] <- gsub("[[:space:]]*$", "", dF[[joinCode]])
if (joinCode == "ADMIN") {
dF$ISO3 <- NA
for (i in 1:nrow(dF)) dF$ISO3[i] = rwmGetISO3(dF[[joinCode]][i])
joinCode = "ISO3"
nameCountryColumn = nameJoinColumn
}
matchPosnsInLookup <- match(as.character(dF[[joinCode]]),
as.character(mapWithData#data[[joinCode]]))
failedCodes <- dF[[joinCode]][is.na(matchPosnsInLookup)]
numFailedCodes <- length(failedCodes)
numMatchedCountries <- nrow(dF) - numFailedCodes
cat(numMatchedCountries, "codes from your data successfully matched countries in the map\n")
failedCountries <- dF[[nameCountryColumn]][is.na(matchPosnsInLookup)]
failedCountries <- cbind(failedCodes, failedCountries = as.character(failedCountries))
cat(numFailedCodes, "codes from your data failed to match with a country code in the map\n")
if (verbose)
print(failedCountries)
matchPosnsInUserData <- match(as.character(mapWithData#data[[joinCode]]),
as.character(dF[[joinCode]]))
codesMissingFromUserData <- as.character(mapWithData#data[[joinCode]][is.na(matchPosnsInUserData)])
countriesMissingFromUserData <- as.character(mapWithData#data[["NAME"]][is.na(matchPosnsInUserData)])
numMissingCodes <- length(codesMissingFromUserData)
cat(numMissingCodes, "codes from the map weren't represented in your data\n")
mapWithData#data <- cbind(mapWithData#data, dF[matchPosnsInUserData,
])
invisible(mapWithData)
ll[[1]] <- mapWithData # MODIFIED
ll[[2]] <- failedCountries # MODIFIED
return(ll) # MODIFIED
}
Usage:
sPDF_wfails <- joinCountryData2Map_wfails(
countryExData[,c("ISO3V10", "Country")],
joinCode = "ISO3",
nameJoinColumn = "ISO3V10",
verbose = TRUE)
# This is the result of the original function
# sPDF_wfails[[1]]
# This is info on the failed joins
sPDF_wfails[[2]]
# > failedCodes failedCountries
# > [1,] "AGO_FAIL" "Angola"

Related

Error in do.ply(i) : task 1 failed - "could not find function "%>%"" in R parallel programming

Every time I run the script it always gives me an error: Error in { : task 1 failed - "could not find function "%>%""
I already check every post on this forum and tried to apply it but no one works.
Please advise any solution.
Please note: I have only 2 cores on my PC.
My code is as follows:
library(dplyr) # For basic data manipulation
library(ncdf4) # For creating NetCDF files
library(tidync) # For easily dealing with NetCDF data
library(ggplot2) # For visualising data
library(doParallel) # For parallel processing
MHW_res_grid <- readRDS("C:/Users/SUDHANSHU KUMAR/Desktop/MTech Project/R/MHW_result.Rds")
# Function for creating arrays from data.frames
df_acast <- function(df, lon_lat){
# Force grid
res <- df %>%
right_join(lon_lat, by = c("lon", "lat")) %>%
arrange(lon, lat)
# Convert date values to integers if they are present
if(lubridate::is.Date(res[1,4])) res[,4] <- as.integer(res[,4])
# Create array
res_array <- base::array(res[,4], dim = c(length(unique(lon_lat$lon)), length(unique(lon_lat$lat))))
dimnames(res_array) <- list(lon = unique(lon_lat$lon),
lat = unique(lon_lat$lat))
return(res_array)
}
# Wrapper function for last step before data are entered into NetCDF files
df_proc <- function(df, col_choice){
# Determine the correct array dimensions
lon_step <- mean(diff(sort(unique(df$lon))))
lat_step <- mean(diff(sort(unique(df$lat))))
lon <- seq(min(df$lon), max(df$lon), by = lon_step)
lat <- seq(min(df$lat), max(df$lat), by = lat_step)
# Create full lon/lat grid
lon_lat <- expand.grid(lon = lon, lat = lat) %>%
data.frame()
# Acast only the desired column
dfa <- plyr::daply(df[c("lon", "lat", "event_no", col_choice)],
c("event_no"), df_acast, .parallel = T, lon_lat = lon_lat)
return(dfa)
}
# We must now run this function on each column of data we want to add to the NetCDF file
doParallel::registerDoParallel(cores = 2)
prep_dur <- df_proc(MHW_res_grid, "duration")
prep_max_int <- df_proc(MHW_res_grid, "intensity_max")
prep_cum_int <- df_proc(MHW_res_grid, "intensity_cumulative")
prep_peak <- df_proc(MHW_res_grid, "date_peak")

How to supply user input programmatically to an interactive function in R (specifically in the totalcensus package)

I'm writing an R script that has some interactive functions that stop the code to wait for user input. I need the script to run fully automated so that Travis-CI can build it independently. How do I supply the user input programmatically so that the code runs continuously instead of stopping for interactive input?
Specifically, I'm using the read_acs5year function from the totalcensus package in R, and when I run this code:
acs_data_2008_2012_via_totalcensus <-
read_acs5year(
year = 2012,
states = "AL",
table_contents =
"B01003",
summary_level = "tract",
with_margin = TRUE
)
it outputs this to the console:
Do you want to download data generated from decennial census 2010? This dataset is necessary for processing all summary files.
1: yes
2: no
Selection:
and then waits for user input. I want to give the function the input of 1 automatically.
As suggested by #NelsonGon you could create your own version of the function by changing the menu option
get_data <- function(year,
states,
table_contents = NULL,
areas = NULL,
geo_headers = NULL,
summary_level = NULL,
geo_comp = "total",
with_margin = FALSE,
dec_fill = NULL,
show_progress = TRUE){
### check if the path to census is set ###
if (Sys.getenv("PATH_TO_CENSUS") == ""){
message(paste(
"Please set up the path to downloaded census data",
"following the instruction at",
"https://github.com/GL-Li/totalcensus."
))
return(NULL)
}
### check whether to download data ###
path_to_census <- Sys.getenv("PATH_TO_CENSUS")
# check if need to download generated data from census2010
generated_data <- paste0(path_to_census, "/generated_data")
if (!file.exists(generated_data)){
download_generated_data()
} else {
version_file <- paste0(generated_data, "/version.txt")
if (!file.exists(version_file)){
download_generated_data()
} else {
version = readChar(version_file, 5)
if (version != "0.6.0"){
download_generated_data()
}
}
}
# check whether to download acs5year data
not_downloaded <- c()
for (st in states){
# only check for geoheader file
if (!file.exists(paste0(
path_to_census, "/acs5year/", year, "/g", year, "5",
tolower(st), ".csv"
))){
not_downloaded <- c(not_downloaded, st)
}
}
if (length(not_downloaded) > 0){
cat(paste0(
"Do you want to download ",
year,
" ACS 5-year survey summary files of states ",
paste0(not_downloaded, collapse = ", "),
" and save it to your computer? ",
"It is necessary for extracting the data."
))
if (TRUE){
download_census("acs5", year, not_downloaded)
} else {
stop("You choose not to download data.")
}
}
### read data ###
if (is.null(summary_level)) summary_level <- "*"
states <- toupper(states) # allow lowcase input
if (is.null(areas) + is.null(geo_headers) == 0){
stop("Must keep at least one of arguments areas and geo_headers NULL")
}
# add population to table contents so that it will never empty, remove it
# from table_contents if "B01003_001" is included.
if (any(grepl("B01003_001", table_contents))){
message("B01003_001 is the population column.")
}
table_contents <- table_contents[!grepl("B01003_001", table_contents)]
table_contents <- c("population = B01003_001", table_contents) %>% unique()
content_names <- organize_tablecontents(table_contents) %>% .[, name]
table_contents <- organize_tablecontents(table_contents) %>% .[, reference] %>%
toupper() # allow lowcase in reference input
# turn off warning, fread() gives warnings when read non-scii characters.
options(warn = -1)
if (!is.null(areas)){
dt <- read_acs5year_areas_(
year, states, table_contents, areas, summary_level, geo_comp,
with_margin, dec_fill, show_progress
)
} else {
geo_headers <- unique(geo_headers)
dt <- read_acs5year_geoheaders_(
year, states, table_contents, geo_headers, summary_level, geo_comp,
with_margin, dec_fill, show_progress
)
}
setnames(dt, table_contents, content_names)
if (with_margin){
setnames(dt, paste0(table_contents, "_m"),
paste0(content_names, "_margin"))
}
options(warn = 0)
return(dt)
}
and then use this nee function instead of read_acs5year. It would automatically be downloaded now without waiting for any user input.
get_data(
year = 2012,
states = "AL",
table_contents = "B01003",
summary_level = "tract",
with_margin = TRUE
)

movielense popularity recommender code with R

I'm now studying R, and now doing project about movie recommend algorithm.
I used movielense 100k data with recommenderlab library, and use these tutorials.
https://mitxpro.mit.edu/asset-v1%3AMITProfessionalX+DSx+2017_T1+type#asset+block#Module4_CS1_Movies.pdf
https://cran.r-project.org/web/packages/recommenderlab/vignettes/recommenderlab.pdf
I've now calculated sparsity, and splited data into train and test data.
And I want to make popularity recommendation code. My code is here:
install.packages("SnowballC")
install.packages("class")
install.packages("dbscan")
install.packages("proxy")
install.packages("recommenderlab")
install.packages("dplyr")
install.packages("tm")
install.packages("reshape2")
library(recommenderlab)
library(dplyr)
library(tm)
library(SnowballC)
library(class)
library(dbscan)
library(proxy)
library(reshape2)
#read data
data<- read.table('C:/Users/ginny/OneDrive/Documents/2018_1/dataanalytics/실습3/ml-100k/u.data')
#####raw data to matrix#####
data.frame2matrix = function(data, rowtitle, coltitle, datatitle,
rowdecreasing = FALSE, coldecreasing = FALSE,
default_value = NA) {
# check, whether titles exist as columns names in the data.frame data
if ( (!(rowtitle%in%names(data)))
|| (!(coltitle%in%names(data)))
|| (!(datatitle%in%names(data))) ) {
stop('data.frame2matrix: bad row-, col-, or datatitle.')
}
# get number of rows in data
ndata = dim(data)[1]
# extract rownames and colnames for the matrix from the data.frame
rownames = sort(unique(data[[rowtitle]]), decreasing = rowdecreasing)
nrows = length(rownames)
colnames = sort(unique(data[[coltitle]]), decreasing = coldecreasing)
ncols = length(colnames)
# initialize the matrix
out_matrix = matrix(NA,
nrow = nrows, ncol = ncols,
dimnames=list(rownames, colnames))
# iterate rows of data
for (i1 in 1:ndata) {
# get matrix-row and matrix-column indices for the current data-row
iR = which(rownames==data[[rowtitle]][i1])
iC = which(colnames==data[[coltitle]][i1])
# throw an error if the matrix entry (iR,iC) is already filled.
if (!is.na(out_matrix[iR, iC])) stop('data.frame2matrix: double entry in data.frame')
out_matrix[iR, iC] = data[[datatitle]][i1]
}
# set empty matrix entries to the default value
out_matrix[is.na(out_matrix)] = default_value
# return matrix
return(out_matrix)
}
#data 열 별로 이름 지정('' 안은 필요에 따라 변경 가능)
colnames(data)<-c('user_id','item_id','rating','timestamp')
#raw 데이터를 matrix로 변환
pre_data = data.frame2matrix(data, 'user_id', 'item_id', 'rating')
#matrix를 realratingmatrix로 변환
target_data<- as(as(pre_data, "matrix"), "realRatingMatrix")
data=data[,-which(names(data) %in% c("timestamp"))]
data
str(data)
summary(data)
hist(data$rating)
write.csv(data,"C:/Users/ginny/OneDrive/Documents/2018_1/dataanalytics/실습
3/u.csv")
Number_Ratings=nrow(data)
Number_Ratings
Number_Movies=length(unique(data$item_id))
Number_Movies
Number_Users=length(unique(data$user_id))
Number_Users
data1=data[data$user_id %in% names(table(data$user_id))
[table(data$user_id)>50],]
Number_Ratings1=nrow(data1)
Number_Movies1=length(unique(data1$item_id))
Number_Users1=length(unique(data1$user_id))
sparsity=((Number_Ratings1)*3*5*100)/((Number_Movies1)*(Number_Users1))
sparsity
install.packages("caTools")
library(caTools)
set.seed(10)
sample=sample.split(data1$rating, SplitRatio=0.75)
train=subset(data1, sample==TRUE)
test=subset(data1, sample==FALSE)
data2<-as.data.frame(data1)
data2
#matrix to realratingmatrix
target_data2<- as(as(pre_data2, "matrix"), "realRatingMatrix")
recommender_models<-recommenderRegistry$get_entry(dataType =
"realRatingMatrix")
recomm_model <- Recommender(data2$rating, method = "POPULAR")
I used data2 realRatingMatrix, but when I run last line, error like this happen:
Error in (function (classes, fdef, mtable) : unable to find an
inherited method for function ‘Recommender’ for signature ‘"integer"’
Can anybody help me what's wrong with it?

R Interactive Sankey Diagram + Hierarchize Nodes

I am trying to visualize sequences of events by using Sankey diagrams.
I have a set of event (Event1 to Event16) over sequences of different length.
The steps of the sequences are noted by T0, T0 - 1, T0 - 2 ...
The width of the flow is corresponding to the frequency rate of the sequences.
I would like that all the nodes corresponding to a given step to be aligned vertically.
By using the GoogleVis package I succeed to obtain the following :
Sankey with GoogleVis
As you can see some events T0-1, T0-2 and T0-3... are on the far right, instead of with the others of their time step.
It seems to be due to the fact that it is not possible to have nodes whithout children...
Do you know a way to have hierarchize nodes or/and nodes whithout children, for GoogleVis ?
If not, do you know another R package which could allow to have these characteristics for interactive plots ?
My R code is bellow. The main variable containing the sequences is a list of list, see picture.
Data containing sequences
My code :
# Package
library(googleVis)
library(dplyr)
library(reshape2)
library(tidyverse)
# Load
load("SeqCh")
# Loop -------------------------------------------------------------
# Inits
From = c()
To = c()
Freq = c()
Target = SeqCh
# Get maximum length of sequence
maxls = 0
for (kk in 1:length(Target)){
temp = length(Target[[kk]])
if (temp > maxls){
maxls = temp
}
}
# Loop on length of sequences
for (zz in 2:maxls){
# Prefix to add to manage same event repeated
if (zz == 2){
SufixFrom = "(T0)"
SufixTo = "(T0 - 1)"
} else {
SufixFrom = paste("(T0 - ", as.character(zz-2), ")", sep = "")
SufixTo = paste("(T0 - ", as.character(zz-1), ")", sep = "")
}
# Message
cat("\n")
print(paste(" Processing events from ", SufixFrom, " to ", SufixTo))
# Loop on Target
ind = lapply(Target, function(x) length(x) == zz)
TargetSub = Target[unlist(ind)]
FreqSub = Support[unlist(ind)]
for (jj in 1:length(TargetSub)){
temp = TargetSub[[jj]]
TempFrom = paste(temp[zz-1], SufixFrom, sep = " ")
TempTo = paste(temp[zz], SufixTo, sep = " ")
From = c(From, TempFrom)
To = c(To, TempTo)
Freq = c(Freq, FreqSub[jj])
}
} # end for loop on length of sequences
# All in same variable
Flows = data.frame("From" = From, "To" = To, "Occurence_Frequency" = Freq, stringsAsFactors = FALSE)
# Plot --------------------------------------------------------------------
plot(gvisSankey(Flows, from='From', to='To', weight="Occurence_Frequency",
options=list(height=900, width=1800, sankey="{link:{color:{fill:'lightblue'}}}")))
Thanks, Romain.

Calculate the length of shared boundaries between multiple polygons

I have a shapefile and I want to know for each polygon what other polygons touch it. To that end I have this code:
require("rgdal")
require("rgeos")
download.file("https://www.dropbox.com/s/vbxx9dic34qwz63/Polygons.zip?dl=1", "Polygons.zip")
Shapefile <- readOGR(".","Polygons")
Touching_List <- gTouches(Shapefile, byid = TRUE, returnDense=FALSE)
Touching_DF <- setNames(stack(lapply(Touching_List, as.character)), c("TOUCHING", "ORIGIN"))
I now want to go further and understand the extent to which each polygon touch other polygons. What I am after for each row in Touching_DF is a total length/perimeter for each ORIGIN polygon and the total length that each TOUCHING polygon is touching the origin polygon. This will then allow the percentage of the shared boundary to be calculated. I can imagine the output of this would be 3 new columns in Touching_DF (e.g. for the first row it could be something like origin parameter 1000m, touching length 500m, shared boundary 50%). Thanks.
EDIT 1
I have applied #StatnMap's answer to my real dataset. It appears that gTouches is returning results if a polygon shares both an edge and a point. These points are causing issues because they have no length. I have modified StatnMap's portion of code to deal with it, but when it comes to creating the data frame at the end there is a mismatch between how many shared edges/vertices gTouches returns and how many edges have lengths.
Here is some code to demonstrate the problem using a sample of my actual dataset:
library(rgdal)
library(rgeos)
library(sp)
library(raster)
download.file("https://www.dropbox.com/s/hsnrdfthut6klqn/Sample.zip?dl=1", "Sample.zip")
unzip("Sample.zip")
Shapefile <- readOGR(".","Sample")
Touching_List <- gTouches(Shapefile, byid = TRUE, returnDense=FALSE)
# ---- Calculate perimeters of all polygons ----
perimeters <- sp::SpatialLinesLengths(as(Shapefile, "SpatialLines"))
# ---- All in a lapply loop ----
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
if(class(lines) != "SpatialLines"){lines <- lines#lineobj}
l_lines <- sp::SpatialLinesLengths(lines, longlat=FALSE)
results <- data.frame(origin = from,
perimeter = perimeters[from],
touching = Touching_List[[from]],
t.length = l_lines,
t.pc = 100*l_lines/perimeters[from])
results
})
This specifically shows the issue for one of the polygons:
from <- 4
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
if(class(lines) != "SpatialLines"){lines <- lines#lineobj}
l_lines <- sp::SpatialLinesLengths(lines, longlat=FALSE)
plot(Shapefile[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1 + 1:length(Touching_List[[from]]), lwd = 2)
The two possible solutions I see are 1. getting gTouches to return only shared edges with a length greater than zero or 2. returning a length of zero (rather than error) when a point rather than an edge is encountered. So far I can't find anything that will do either of these things.
EDIT 2
#StatnMap's revised solution works great. However, if a polygon does not share a snapped boarder with its neighbouring polygon (i.e. it goes to a point and then creates an island slither polygon) then it comes up with this error after lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
Error in RGEOSBinTopoFunc(spgeom1, spgeom2, byid, id, drop_lower_td, unaryUnion_if_byid_false, :
Geometry collections may not contain other geometry collections
I have been looking for a solution that is able to identify polygons with badly drawn borders and not perform any calculations and return 'NA' in res (so they can still be identified later). However, I have been unable to find a command that distinguishes these problematic polygons from 'normal' polygons.
Running #StatnMap's revised solution with these 8 polygons demonstrates the issue:
download.file("https://www.dropbox.com/s/ttg2mi2nq1gbbrq/Bad_Polygon.zip?dl=1", "Bad_Polygon.zip")
unzip("Bad_Polygon.zip")
Shapefile <- readOGR(".","Bad_Polygon")
The intersection of two polygons only touching themselves is a line. Calculating a line length is easy with functions of spatial libraries in R.
As you started your example with library sp, you'll find a proposition with this library. However, I also give you a proposition with the new library sf.
Calculate polygons shared boundaries lengths with library sp
require("rgdal")
require("rgeos")
library(sp)
library(raster)
download.file("https://www.dropbox.com/s/vbxx9dic34qwz63/Polygons.zip?dl=1", "Polygons.zip")
unzip("Polygons.zip")
Shapefile <- readOGR(".","Polygons")
Touching_List <- gTouches(Shapefile, byid = TRUE, returnDense=FALSE)
# Touching_DF <- setNames(utils::stack(lapply(Touching_List, as.character)), c("TOUCHING", "ORIGIN"))
# ---- Calculate perimeters of all polygons ----
perimeters <- sp::SpatialLinesLengths(as(Shapefile, "SpatialLines"))
# ---- Example with the first object of the list and first neighbor ----
from <- 1
to <- 1
line <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]][to],])
l_line <- sp::SpatialLinesLengths(line)
plot(Shapefile[c(from, Touching_List[[from]][to]),])
plot(line, add = TRUE, col = "red", lwd = 2)
# ---- Example with the first object of the list and all neighbors ----
from <- 1
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
l_lines <- sp::SpatialLinesLengths(lines)
plot(Shapefile[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1 + 1:length(Touching_List[[from]]), lwd = 2)
# ---- All in a lapply loop ----
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
l_lines <- sp::SpatialLinesLengths(lines)
res <- data.frame(origin = from,
perimeter = perimeters[from],
touching = Touching_List[[from]],
t.length = l_lines,
t.pc = 100*l_lines/perimeters[from])
res
})
# ---- Retrieve as a dataframe ----
all.length.df <- do.call("rbind", all.length.list)
In the table above, t.length is the touching length and t.pc is the touching percentage with regards to the perimeter of the polygon of origin.
Edit: Some shared boundaries are points (with sp)
As commented, some frontiers may be a unique point instead of lines. To account for this case, I suggest to double the coordinates of the point to create a line of length=0. This requires to calculate intersections with other polygons one by one, when this case appear.
For a single polygon, we can test this:
# Example with the first object of the list and all neighbours
from <- 4
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
# If lines and points, need to do it one by one to find the point
if (class(lines) == "SpatialCollections") {
list.Lines <- lapply(1:length(Touching_List[[from]]), function(to) {
line.single <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]][to],])
if (class(line.single) == "SpatialPoints") {
# Double the point to create a line
L1 <- rbind(line.single#coords, line.single#coords)
rownames(L1) <- letters[1:2]
Sl1 <- Line(L1)
Lines.single <- Lines(list(Sl1), ID = as.character(to))
} else if (class(line.single) == "SpatialLines") {
Lines.single <- line.single#lines[[1]]
Lines.single#ID <- as.character(to)
}
Lines.single
})
lines <- SpatialLines(list.Lines)
}
l_lines <- sp::SpatialLinesLengths(lines)
plot(Shapefile[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1 + 1:length(Touching_List[[from]]), lwd = 2)
For all in a lapply loop:
# Corrected for point outputs: All in a lapply loop
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]],], byid = TRUE)
if (class(lines) == "SpatialCollections") {
list.Lines <- lapply(1:length(Touching_List[[from]]), function(to) {
line.single <- rgeos::gIntersection(Shapefile[from,], Shapefile[Touching_List[[from]][to],])
if (class(line.single) == "SpatialPoints") {
# Double the point to create a line
L1 <- rbind(line.single#coords, line.single#coords)
rownames(L1) <- letters[1:2]
Sl1 <- Line(L1)
Lines.single <- Lines(list(Sl1), ID = as.character(to))
} else if (class(line.single) == "SpatialLines") {
Lines.single <- line.single#lines[[1]]
Lines.single#ID <- as.character(to)
}
Lines.single
})
lines <- SpatialLines(list.Lines)
}
l_lines <- sp::SpatialLinesLengths(lines)
res <- data.frame(origin = from,
perimeter = perimeters[from],
touching = Touching_List[[from]],
t.length = l_lines,
t.pc = 100*l_lines/perimeters[from])
res
})
all.length.df <- do.call("rbind", all.length.list)
This may also be applied with library sf, but as you apparently chose to work with sp, I won't update the code for this part. Maybe later...
---- End of Edit ----
Calculate polygons shared boundaries lengths with library sf
Figures and outputs are the same.
library(sf)
Shapefile.sf <- st_read(".","Polygons")
# ---- Touching list ----
Touching_List <- st_touches(Shapefile.sf)
# ---- Polygons perimeters ----
perimeters <- st_length(Shapefile.sf)
# ---- Example with the first object of the list and first neighbour ----
from <- 1
to <- 1
line <- st_intersection(Shapefile.sf[from,], Shapefile.sf[Touching_List[[from]][to],])
l_line <- st_length(line)
plot(Shapefile.sf[c(from, Touching_List[[from]][to]),])
plot(line, add = TRUE, col = "red", lwd = 2)
# ---- Example with the first object of the list and all neighbours ----
from <- 1
lines <- st_intersection(Shapefile.sf[from,], Shapefile.sf[Touching_List[[from]],])
lines <- st_cast(lines) # In case of multiple geometries (ex. from=71)
l_lines <- st_length(lines)
plot(Shapefile.sf[c(from, Touching_List[[from]]),])
plot(lines, add = TRUE, col = 1:length(Touching_List[[from]]), lwd = 2)
# ---- All in a lapply loop ----
all.length.list <- lapply(1:length(Touching_List), function(from) {
lines <- st_intersection(Shapefile.sf[from,], Shapefile.sf[Touching_List[[from]],])
lines <- st_cast(lines) # In case of multiple geometries
l_lines <- st_length(lines)
res <- data.frame(origin = from,
perimeter = as.vector(perimeters[from]),
touching = Touching_List[[from]],
t.length = as.vector(l_lines),
t.pc = as.vector(100*l_lines/perimeters[from]))
res
})
# ---- Retrieve as dataframe ----
all.length.df <- do.call("rbind", all.length.list)
Just to add to Sébastien Rochette answer, I think function st_length from sfpackage does not work with polygons (see this post). Instead I suggest using function st_perimeter in lwgeom package.
(I wanted to comment the answer but I don't have enough reputation)

Resources