I want to dissolve all polygons but one from a shape file. Is there a way to do this?
Here is a reproducible example:
library(rgeos)
library(UScensus2000tract)
# load data
data("oregon.tract")
# plot map
plot(oregon.tract)
# Dissolve all polygons
d <- gUnaryUnion(oregon.tract, id = oregon.tract#data$state)
plot(d)
In this example, is it possible to dissolve the polygons by keep the tract number 9501 ?
I assume this is what you are looking for. This is slightly different if you want to merge contiguous members of the tract together, but all you would have to do is remove the first element (the entire state) from the polygon, and then run gUnaryUnion on the remainder, and then re-add the contiguous tract members to a copy of the gUnaryUnion-ized state.
oregon = oregon.tract
names(attributes(oregon.tract))
#[1] "bbox" "proj4string" "polygons" "plotOrder" "data"
#[6] "class"
selected_tract_indices = which(oregon.tract#data$tract == 9501)
oregon <- gUnaryUnion(oregon.tract, id = oregon.tract#data$state)
d = oregon
npolygons = 1
for (selected_tract_index in selected_tract_indices){
d#polygons[[npolygons+1]] = oregon.tract#polygons[[selected_tract_index]]
npolygons = npolygons + 1
d#plotOrder=c(d#plotOrder,as.integer(npolygons))
}
plot(d)
The output of this operation is a SpatialPolygon. In case you want to convert it back to a SpatialPolygonDataDrame, here is a simple way to do it:
# Extract polygon ID's
( did <- sapply(slot(d, "polygons"), function(x) slot(x, "ID")) )
# Create dataframe with correct rownames
( d.df <- data.frame( ID=1:length(d), row.names = did) )
# Try coersion again and check class
d <- SpatialPolygonsDataFrame(d, d.df)
class(d)
Related
I have a set of coordinates of the locations of different individuals, and another set of coordinates of different drop off boxes, for their ballots. I'm trying to find the distance between their residence, and the nearest dropbox. I've attached a copy of the code I have to work through that as of now--it was replicated from another stack overflow example. However, it is not too efficient, as the dataset I'm working with is millions of rows, and the code relies on finding all possible combinations of coordinates, and then pulling the least distance. Is there a more efficient way to deal with this?
What I currently have:
# Made-Up Data
library(geosphere)
library(tidyverse)
geo_voters <- data.frame(voter_id = c(12345, 45678, 89011)
long=c(-43.17536, -43.17411, -43.36605),
lat=c(-22.95414, -22.9302, -23.00133))
geo_dropoff_boxes <- data.frame(long=c(-43.19155, -43.33636, -67.45666),
lat=c(-22.90353, -22.87253, -26,78901))
# Code to find the distance between voters, and the dropoff boxes
# Order into a newdf as needed first.
# First, the voters:
voter_addresses <- data.frame(voter_id = as.character(geo_voters$voter_id),
lon_address = geo_voters$long,
lat_address = geo_voters$lat
)
# Second, the polling locations:
polling_address <- data.frame(place_number = 1:nrow(geo_dropoff_boxes),
lon_place = geo_dropoff_boxes$long,
lat_place = geo_dropoff_boxes$lat
)
# Create nested dfs:
voter_nest <- nest(voter_addresses, -voter_id, .key = 'voter_coords')
polling_nest <- nest(polling_address, -place_number, .key = 'polling_coords')
# Combine for combinations:
data_master <- crossing(voter_nest, polling_nest)
# Calculate shortest distance:
shortest_dist <- data_master %>%
mutate(dist = map2_dbl(voter_coords, polling_coords, distm)) %>%
group_by(voter_id) %>%
filter(dist == min(dist)) %>%
mutate(dist_km = dist/1000,
voter_id = as.character(voter_id)) %>%
select(voter_id, dist_km)
The sf package makes this simple. The st_as_sf() function converts data frame of lat-long values to georeferenced points, and the st_distance() function calculates the distances between them. When running st_as_sf(), you'll need to specify a coordinate reference system. It looks like you're using latitude and longitude, so I specify crs="epsg:4326", which is the most common latitude/longitude reference.
library( sf )
geo_voters <- data.frame(voter_id = c(12345, 45678, 89011)
long=c(-43.17536, -43.17411, -43.36605),
lat=c(-22.95414, -22.9302, -23.00133))
geo_dropoff_boxes <- data.frame(long=c(-43.19155, -43.33636, -67.45666),
lat=c(-22.90353, -22.87253, -26.78901))
# convert the data to sf features
geo_voters = st_as_sf( geo_voters, coords=c('long', 'lat'), crs="epsg:4326" )
geo_dropoff_boxes = st_as_sf( geo_dropoff_boxes, coords=c('long', 'lat'), crs="epsg:4326" )
# calculate the distances between voters and drop boxes
dist = st_distance( geo_voters, geo_dropoff_boxes )
print(dist)
Now each row represents a voter and each column represents their distance to a drop box (in meters):
Units: [m]
[,1] [,2] [,3]
[1,] 5866.745 18821.87 2482400
[2,] 3461.945 17813.57 2483210
[3,] 20916.618 14641.09 2462186
I have a shp file. I want to get the names of neighboring counties in all regions according to the latitude and longitude in the file. I found that some regions obviously have neighboring counties, but I didn’t get the neighboring counties when I ran the code. I don't know what was wrong.
library(tidyverse)
library(plyr)
library(sf)
library(readxl)
> county <-st_read('D:/county.shp',stringsAsFactors = FALSE)
> neighbor_counties <- function(subcounty){
name <- st_touches(subcounty, county)
county[unlist(name), ]$NAME
}
> output <- vector("list", nrow(county))
> names(output) <- county$NAME
> for (i in seq_len(nrow(county))) {
output[[i]] <- suppressWarnings(neighbor_counties(county[i,]))
}
> output
> head(output)
> neighbor <- output %>%
ldply(data.frame) %>%
set_names("ori_county", "neighbor_county")
Your example is not exactly reproducible, but we are lucky to have the nc.shp shapefile that ships with {sf} available.
So consider this code; it is built on sf::st_touches() function, with the county shapefile passed as argument twice (once for the touching counties, and once for the counties being touched). Sparse = TRUE makes it return a list of indexes of neighboring counties.
To find names of neighbors of a particular county you need to know the index of the county of interest, and then subset the list of neighbors accordingly. You will get indices of the neighboring counties.
As for the second part of your question (expressed in comments) = how to get from a list of indices to a data frame of neighbors - I suggest creating a function returning a data frame, and then applying it via purrr::map_dfr() to the vector of indices as starting points; consider the code provided and amend as necessary. It should give you a start...
library(sf)
shape <- st_read(system.file("shape/nc.shp", package="sf")) # included with sf package
# a list of neighbors
neighbors <- st_touches(shape, # first
shape, # second
sparse = T)
# neighbors of County Mecklenburg (as in Charlotte of Mecklenburg-Strelitz)
# index of Mecklenburg cnty
idx_strelitz <- which(shape$NAME == 'Mecklenburg')
# index of neighbors of Mecklenburg cnty
nbr_mecklenburg <- neighbors[idx_strelitz][[1]]
# names of neighbours of cnty Meckl.
shape$NAME[nbr_mecklenburg]
# [1] "Iredell" "Lincoln" "Cabarrus" "Gaston" "Union"
# a visual check
plot(st_geometry(shape))
plot(shape[idx_strelitz, ], col = "blue", add = T)
plot(shape[nbr_mecklenburg,], col = "red", add = T)
# second question: get pairs of names as a data frame
# a function returning data frame of neighbors of a given cnty
nbr_pairs <- function(idx) {
data.frame(ori_county = rep(shape$NAME[idx], length(neighbors[[idx]])),
neighbor_county = shape$NAME[neighbors[[idx]]])
}
# check - cnty Mecklemburg
nbr_pairs(idx_strelitz)
# ori_county neighbor_county
# 1 Mecklenburg Iredell
# 2 Mecklenburg Lincoln
# 3 Mecklenburg Cabarrus
# 4 Mecklenburg Gaston
# 5 Mecklenburg Union
# apply to list of indices
pairs_of_names <- purrr::map_dfr(seq_along(neighbors),
nbr_pairs)
I want to assign country Exclusive Economic Zones to point data from a raster where the points represent Aragonite saturation levels in the ocean.
The raster is a single layer that gives an Aragonite value for many latitude/longitude points in the ocean.
I want to assign each latitude/longitude point to an exclusive economic zone.
This site does it for single pairs of coordinates but I have 15,000 points so I am hoping it is possible to do in R.
The data look like this:
long lat Aragonite
1 20.89833 84.66917 1.542071
2 22.69496 84.66917 1.538187
3 24.49159 84.66917 1.537830
4 26.28822 84.66917 1.534834
5 28.08485 84.66917 1.534595
6 29.88148 84.66917 1.532505
Previously I have used the below code to assign countries to raster points but this gives NA back for many of the points in the ocean that are within national EEZ's.
#convert the raster to points for assigning countries
r.pts <- rasterToPoints(r, spatial = TRUE)
#view new proj 4 string of spatialpointsdataframe
proj4string(r.pts)
##[1] "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
###converting reclassified points to countries
# The single argument to this function, points, is a data.frame in which:
# - column 1 contains the longitude in degrees
# - column 2 contains the latitude in degrees
coords2country = function(r.pts)
{
countriesSP <- getMap(resolution='high')
#countriesSP <- getMap(resolution='high') #you could use high res map from rworldxtra if you were concerned about detail
#setting CRS directly to that from rworldmap
r.pts = SpatialPoints(r.pts, proj4string=CRS(proj4string(countriesSP)))
# use 'over' to get indices of the Polygons object containing each point
indices = over(r.pts, countriesSP)
# return the ADMIN names of each country
indices$ADMIN
#indices$ISO3 # returns the ISO3 code
#indices$continent # returns the continent (6 continent model)
#indices$REGION # returns the continent (7 continent model)
}
#get country names for each pair of points
rCountries <- coords2country(r.pts)
Is there any way to do a similar function to coords2countries but for EEZ's in the ocean?
EDIT: some data for reproducible example
dput(head(r.pts))
structure(list(layer = c(5, 5, 5, 5, 5, 5), x = c(-178.311660375408,-176.511660375408, -174.711660375408, -172.911660375408, -171.111660375408,-169.311660375408), y = c(73.1088933113454, 73.1088933113454,73.1088933113454, 73.1088933113454, 73.1088933113454, 73.1088933113454),.Names = c("layer", "x", "y"),row.names = c(NA, 6L), class = "data.frame")
You need a shapefile that includes the EEZs. Download the one that is here: World EEZ v9 (2016-10-21, 123 MB) http://www.marineregions.org/downloads.php#marbound
You can load the EEZ shapefile with the readOGR() function from the rgdal package. Unzip the EEZ shapefile zip to your working directory and run countriesSP <- rgdal::readOGR(dsn = ".", layer = "eez_boundaries") in place of countriesSP <- getMap(resolution='high')
None of the example data you provided falls in a country's EEZ, so I can't tell if this actually works, but it probably should...
library(sp)
library(rworldmap)
library(rgeos)
r <- read.table(header = TRUE, text = "
long lat Aragonite
1 20.89833 84.66917 1.542071
2 22.69496 84.66917 1.538187
3 24.49159 84.66917 1.537830
4 26.28822 84.66917 1.534834
5 28.08485 84.66917 1.534595
6 29.88148 84.66917 1.532505
")
# or
#r <- data.frame(long = c(-178.311660375408,-176.511660375408, -174.711660375408, -172.911660375408, -171.111660375408,-169.311660375408),
# lat = c(73.1088933113454, 73.1088933113454,73.1088933113454, 73.1088933113454, 73.1088933113454, 73.1088933113454))
r.pts <- sp::SpatialPoints(r)
# download file from here: http://www.marineregions.org/download_file.php?fn=v9_20161021
# put the zip file in your working directory: getwd()
unzip('World_EEZ_v9_20161021.zip')
# countriesSP <- rworldmap::getMap(resolution = "high")
# or
countriesSP <- rgdal::readOGR(dsn = ".", layer = "eez_boundaries")
r.pts <- sp::SpatialPoints(r.pts, proj4string = sp::CRS(proj4string(countriesSP)))
indices <- over(r.pts, countriesSP)
indices$ADMIN
I want to dissolve some polygons, and I am doing the following:
Batching in the shapefile (DA.shp - sensitive information hence first two sample records and only first three columns shown)
Batching in csv file called zone.csv that has the information for dissolving joining the zone.csv to DA (first five records shown due to sensitivity)
Dissolving the joined shapefile
Creating row IDs to make the dissolved shapefile into a polygondataframe for export.
It all goes smoothly, however, I want to carry the Zed and Criteria fields in my dissolved polygon, like one can using GIS. I have tried to search in vain, so any help will be appreciated.
library(rgeos)
library(rgdal)
library(sp)
# set working directory
wd <- setwd("c:/Personal/R")
# read DA shapefile
da <- readOGR(wd, "DA")
plot(da)
crs.shp <- proj4string(da)
da#data[1:2,1:3] # check first two records
OBJECTID DAUID CDUID
0 3 35204831 3520
1 5 35180720 3518
# batchin text file with zone numbers
zones.csv <- read.csv(file="c:/personal/R/Variant1.csv", header=TRUE, sep=",")
zones.csv$DAUID <- as.character(zones.csv$DAUID) # make DAUID as character for join
zones.csv[1:5,]
DAUID zed Criteria
1 35140110 3102 GGHM zones
2 35140111 3102 GGHM zones
3 35140112 3102 GGHM zones
4 35140113 3102 GGHM zones
5 35140114 3102 GGHM zones
da1 <- da # save a copy
da1#data$DAUID <- as.character(da1#data$DAUID) # make character field for join
da1#data <- merge(da1#data, zones.csv, by.x = "DAUID", by.y = "DAUID", all.x=T, sort=F)
# Now dissolve
zone.shp <- gUnaryUnion(da1, id = da1#data$zed.x)
plot(zone.shp)
# extract zone Id's to make dataframe
Gid <- sapply(slot(zone.shp, "polygons"), function(x) slot(x, "ID"))
# Create dataframe with correct rownames
z.df <- data.frame( ID=1:length(zone.shp), row.names = Gid)
# make Polygondataframe to export as shapefile
zone.shp.pdf <- SpatialPolygonsDataFrame(zone.shp, data=z.df)
zone.shp.pdf#data$crit <-
proj4string(zone.shp.pdf) <- CRS(proj4string(da))
Here is a self-contained reproducible example with some SpatialPolygons:
libarary(raster)
p <- shapefile(system.file("external/lux.shp", package="raster"))
Create a data.frame, and so on.
Anyway, I think you can use raster::aggregate to solve your problem. Below is a simplified and improved script, but I cannot check if it works as I do not have your data.
library(raster)
da <- shapefile("c:/Personal/DA.shp")
zones <- read.csv("c:/personal/R/Variant1.csv", stringsAsFactors=FALSE)
da1 <- merge(da, zones, by="DAUID", all.x=TRUE)
# Now dissolve
zone.shp <- aggregate(da1, c('zed', 'Criteria'))
If you want to write this to a shapefile:
shapefile(zone.shp, 'file.shp')
I have problems by merging two dataframes with different length.
To make it as easy as possible the datasets:
Dataset A - Persons
http://pastebin.com/HbaeqACi
Dataset B - Waterfeatures:
http://pastebin.com/UdDvNtHs
Dataset C - City:
http://pastebin.com/nATnkMRk
I have some R-Code , which is not relevant for my problem, but I will paste it completely, so you have exactly the same situation:
require(fossil)
library(fossil)
#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
city = read.csv("city.csv", header =TRUE)
#### calculate distance
# Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
#Generate a function that looks for the closest waterfeature for each id coordinates and calculate/save the distance
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
disnw <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
disnw <- min(disnw)
disnw <- data.frame(disnw, WaterFeature=tmp)
return(disnw)
}
# apply distance calculation function to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))
Now I want to copy the calculated distance to the city dataset. I've tried to use merge (both datasets have the city attribute) and the persons only contains the cities from the city dataset.
city_all_parameters = city
city_all_parameters = merge(city_all_parameters, persons[,c("city", "disnw")], all=TRUE)
Unfortunately this is not the outcome, which I want to have. I have 164 rows, but I only want to have these 5 rows + the variable disnw and it's corresponding value.
I've tried it out with rbind as well, but there I get the error:
"Error in rbind(deparse.level, ...) : numbers of columns of arguments do not match"
Any tip, how to solve this problem?
Your code works as you intended, but I wanted to show you a more elegant way to do it in base. I have commented the code:
library(fossil)
# If you want to use pastebin, you can make it easy to load in for us like this:
# But I recommend using dput(persons) and pasting the results in.
persons = read.csv("http://pastebin.com/raw.php?i=HbaeqACi", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("http://pastebin.com/raw.php?i=UdDvNtHs", header =TRUE, stringsAsFactors=FALSE)
city = read.csv("http://pastebin.com/raw.php?i=nATnkMRk", header =TRUE)
# Use column names instead of column indices to clarify your code
UniqueCoordinates <- data.frame(unique(persons[,c('POINT_X','POINT_Y')]))
# I didn't understand why you wanted to format the Id,
# but you don't need the Id in this code
# UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)
# Instead of calculating the pairwise distance between all
# the water points everytime, use deg.dist with mapply:
UniqueCoordinates$disnw <- mapply(function(x,y) min(deg.dist(long1=x,lat1=y,
long2=water$POINT_X,
lat2=water$POINT_Y)),
UniqueCoordinates$POINT_X,
UniqueCoordinates$POINT_Y)
persons <- merge(UniqueCoordinates,persons)
# I think this is what you wanted...
unique(persons[c('city','disnw')])
# city disnw
# 1 City E 6.4865635
# 20 City A 1.6604204
# 69 City B 0.9893909
# 113 City D 0.6001968
# 148 City C 0.5308953
# If you want to merge to the city
merge(persons,city,by='city')