R Convert column to spatial objects - Polygon - r

I have some polygon data as an csv file with this format:
"ID","name","geometry"
"1","A","list(c(-0.25291548373789, -0.251828087559847, -0.251065989114876, -0.24944850900495, 51.4652163826438, 51.4652280432439, 51.4652040920587, 51.4650720752044))"
"2","B","list(c(-0.26291548373789, -0.211828087559847, -0.231065989114876, -0.26944850900495, 51.1652163826438, 51.2652280432439, 51.3652040920587, 51.5650720752044))"
And, when I import it to Rstudio looks like this:
Which is what I'm looking for, but the type of data is not what I need:
$ ID : num [1:2] 1 2
$ name : chr [1:2] "A" "B"
$ geometry: chr [1:2] ""list(c(-0.25291548373789, -0.2518280875598..."
What I need is this geometry column to an Spatial Object as Polygon, to be able to use it on Leaflet as:
addPolygons(data=test$geometry)
Any ideas how to achieve this?
Thanks!

test <- st_as_sf(test, wkt = 'geometry')

Related

bigrquery: Create a BigQuery table with geoJSON files doesn't work

I'd like to create a BigQuery table with geoJSON files, despite the geoJSONis an accepted format in BQ (NEWLINE_DELIMITED_JSON) and bq_fields specification, or something coercible to it (like a data frame) the function bq_table_create() of the bigrquery package doesn't work. In my example below the output error is Erro: Unsupported type: list:
library(sf)
library(bigrquery)
library(DBI)
library(googleAuthR)
library(geojsonsf)
library(geojsonR)
# Convert shapefile to geoJSON
stands_sel <- st_read(
"D:/Dropbox/Stinkbug_Ml_detection_CMPC/dashboard/v_08_CMPC/sel_stands_CMPC.shp")
# Open as geoJSON
geo <- sf_geojson(stands_sel)
# Convert geoJSON to data frame
geo_js_df <- as.data.frame(geojson_wkt(geo))
str(geo_js_df)
# 'data.frame': 2 obs. of 17 variables:
# $ SISTEMA_PR: chr "MACRO ESTACA - EUCALIPTO" "SEMENTE - EUCALIPTO"
# $ ESPECIE : chr "SALIGNA" "DUNNI"
# $ ID_UNIQUE : chr "BARBANEGRA159A" "CAMPOSECO016A"
# $ CICLO : num 2 1
# $ LOCALIDADE: chr "BARRA DO RIBEIRO" "DOM FELICIANO"
# $ ROTACAO : num 1 1
# $ CARACTER_1: chr "Produtivo" "Produtivo"
# $ VLR_AREA : num 8.53 28.07
# $ ID_REGIAO : num 11 11
# $ CD_USO_SOL: num 2433 9053
# $ DATA_PLANT: chr "2008/04/15" "2010/04/15"
# $ ID_PROJETO: chr "002" "344"
# $ CARACTERIS: chr "Plantio Comercial" "Plantio Comercial"
# $ PROJETO : chr "BARBA NEGRA" "CAMPO SECO"
# $ ESPACAMENT: chr "3.00 x 2.50" "3.5 x 2.14"
# $ CD_TALHAO : chr "159A" "016A"
# $ geometry :List of 2
# ..$ : 'wkt' chr "MULTIPOLYGON (((-51.2142 -30.3517,-51.2143 -30.3518,-51.2143 -30.3518,-51.2143 -30.3519,-51.2143 -30.3519,-51.2"| __truncated__
# ..$ : 'wkt' chr "MULTIPOLYGON (((-52.3214 -30.4271,-52.3214 -30.4272,-52.3214 -30.4272,-52.3215 -30.4272,-52.3215 -30.4272,-52.3"| __truncated__
# - attr(*, "wkt_column")= chr "geometry"
# Insert information inside BQ
bq_conn <- dbConnect(bigquery(),
project = "my-project",
use_legacy_sql = FALSE
)
# First create the table
players_table = bq_table(project = "my-project", dataset = "stands_ROI_2021", table = "CF_2021")
bq_table_create(x = players_table, fields = as_bq_fields(geo_js_df))
Erro: Unsupported type: list
You can upload data frame with a list-type column on BigQuery by using bq_table_upload() syntax. Try this on your script instead of bq_table_create(),
bq_table_upload(players_table, geo_js_df)
For your reference, I tried this on my end using this sample data with a list-type column:
d <- data.frame(id = 1:2,
name = c("Jon", "Mark"),
children = I(list(c("Mary", "James"),
c("Greta", "Sally")))
)
R console:
Created BQ table:
EDIT:
As per this documentation, FeatureCollection is not yet supported in BigQuery, however there is an ongoing feature request you can find here. Workaround is to convert the GeoJson file to BigQuery new-line-delimited JSON before converting it to dataframe.
To convert GeoJson file to BigQuery new-line-delimited JSON, follow these steps:
Install node.js.
Add packages:
npm install fs JSONStream line-input-stream yargs
Clone the github repository:
git clone https://github.com/mentin/geoscripts.git
Change directory:
cd geoscripts/geojson2bq/
Convert GeoJson file to BigQuery new-line-delimited JSON:
node geojson2bqjson.js sel_stands.geojson > out.json
Using the new-line-delimited JSON file, convert this to dataframe in the R console, then use bq_table_upload() to upload the data to BigQuery.
library(bigrquery)
library(dplyr)
library(tidyverse)
library(jsonlite)
out <- stream_in(file('out.json'))
projectid<-"my-project"
datasetid<-"my-dataset"
bq_conn <- dbConnect(bigquery(),
project = projectid,
dataset = datasetid,
use_legacy_sql = FALSE)
players_table = bq_table(project = "my-project", dataset = "my-dataset", table = "CF_2021_test5")
bq_table_upload(players_table, out)
bq_table_download(players_table)
R console:
BigQuery table:

Use purrr on a position of element in nested list?

Situation: I have a nested list in the image below. I want to use purrr to iterate over the second element of each nested list and apply a date conversion function.
Problem: I can write a for loop easily to iterate over it but I want to use this with purrr. My nested list attempts have not worked out. Normal list fine, nested by position, not fine.
Reproducible example code from Maurits Evers (Thank you!)
lst <- list(
list("one", "12345", "2019-01-01"),
list("two", "67890", "2019-01-02"))
Any assistance appreciated!
Please see the comment above to understand how to provide a reproducible example including sample data.
Since you don't provide sample data, let's create some minimal mock data similar to what is shown in your screenshot.
lst <- list(
list("one", "12345", "2019-01-01"),
list("two", "67890", "2019-01-02"))
To cast the third element of every list element as.Date we can then do
lst <- map(lst, ~{.x[[3]] <- as.Date(.x[[3]]); .x})
We can confirm that the third element of every list element is an object of type Date
str(lst)
#List of 2
# $ :List of 3
# ..$ : chr "one"
# ..$ : chr "12345"
# ..$ : Date[1:1], format: "2019-01-01"
# $ :List of 3
# ..$ : chr "two"
# ..$ : chr "67890"
# ..$ : Date[1:1], format: "2019-01-02"
Update
A more purrr/tidyverse-canonical approach would be to use modify_at (thanks #H1)
lst <- map(lst, ~modify_at(.x, 3, as.Date))
The result is the same as before.

R SpatialPointsDataFrame to SpatialLinesDataFrame

I've imported some GPS points from my Sports watch into R:
library(plotKML)
route <- readGPX("Move_Cycling.gpx")
str(route)
The data looks like this:
List of 5
$ metadata : NULL
$ bounds : NULL
$ waypoints: NULL
$ tracks :List of 1
..$ :List of 1
.. ..$ Move:'data.frame': 677 obs. of 5 variables:
.. .. ..$ lon : num [1:677] -3.8 -3.8 -3.8 -3.8 -3.8 ...
.. .. ..$ lat : num [1:677] 52.1 52.1 52.1 52.1 52.1 ...
.. .. ..$ ele : chr [1:677] "152" "151" "153" "153" ...
.. .. ..$ time : chr [1:677] "2014-06-08T09:17:08.050Z" "2014-06-08T09:17:18.680Z" "2014-06-08T09:17:23.680Z" "2014-06-08T09:17:29.680Z" ...
.. .. ..$ extensions: chr [1:677] "7627.7999992370605141521101800" "7427.6000003814697141511.7000000476837210180.8490009442642210" "9127.523.13003521531.7000000476837210181.799999952316280" "10027.534.96003841534.1999998092651410181.88300029210510" ...
$ routes : NULL
I've managed to transform to get the data points into a SpatialPointsDataFrame and to plot it over Google Earth with:
SPDF <- SpatialPointsDataFrame(coords=route$tracks[[1]]$Move[1:2],
data=route$tracks[[1]]$Move[1:2],
proj4string = CRS("+init=epsg:4326"))
plotKML(SPDF)
What I really want is the cycling track, i.e. a SpatialLinesDataFrame, but I can't work out how to set the ID field correctly to match the SpatialLines object with the data.
This is how far I've got:
tmp <- Line(coords=route$tracks[[1]]$Move[1:2])
tmp2 <- Lines(list(tmp), ID=c("coord"))
tmp3 <- SpatialLines(list(tmp2), proj4string = CRS("+init=epsg:4326"))
# result should be something like,
# but the ID of tmp3 and data don't match at the moment
SPDF <- SpatialLinesDataFrame(tmp3, data)
You can read the GPX file straight into a SpatialLinesDataFrame object with readOGR from the rgdal package. A GPX file can contain tracks, waypoints, etc and these are seen by OGR as layers in the file. So simply:
> track = readOGR("myfile.gpx","tracks")
> plot(track)
should work. You should see lines.
In your last line you've not said what your data is, but it needs to be a data frame with one row per track if you are trying to construct a SpatialLinesDataFrame from some SpatialLines and a data frame, and you can tell it not to bother matching the IDs because you don't actually have any real per-track data you are merging. So:
> SPDF = SpatialLinesDataFrame(tmp3, data.frame(who="me"),match=FALSE)
> plot(SPDF)
But if you use readOGR you don't need to go through all that. It will also read in a bit of per-track metadata from the GPX file.
Happy cycling!
As an update, here's my final solution
library(rgdal)
library(plotKML)
track <- readOGR("Move_Cycling.gpx","tracks")
plotKML(track, colour='red', width=2, labels="Cwm Rhaeadr Trail")

Sorting xts data to look like panel data in R

I need to use 'PerformanceAnalytics' package of R and to use this package, it requires me to convert the data into xts data. The data can be downloaded from this link: https://drive.google.com/file/d/0B8usDJAPeV85elBmWXFwaXB4WUE/edit?usp=sharing . Hence, I have created an xts data by using the following commands:
data<-read.csv('monthly.csv')
dataxts <- xts(data[,-1],order.by=as.Date(data$datadate,format="%d/%m/%Y"))
But after doing this, it looses the panel data structure. I tried to sort the xts data to get it back in panel data form but failed.
Can anyone please help me to reorganize the xts data to look like a panel data. I need to sort them by firm id (gvkey) and data(datadate).
xts objects are sorted by time index only. They cannot be sorted by anything else.
I would encourage you to split your data.frame into a list, by gvkey. Then convert each list element to xts and remove the columns that do not vary across time, storing them as xtsAttributes. You might also want to consider using the yearmon class, since you're dealing with monthly data.
You will have to determine how you want to encode non-numeric, time-varying values, since you cannot mix types in xts objects.
Data <- read.csv('monthly.csv', nrow=1000, as.is=TRUE)
DataList <- split(Data, Data$gvkey)
xtsList <- lapply(DataList, function(x) {
attrCol <- c("iid","tic","cusip","conm","exchg","secstat","tpci",
"cik","fic","conml","costat","idbflag","dldte")
numCol <- c("ajexm","ajpm","cshtrm","prccm","prchm","prclm",
"trfm", "trt1m", "rawpm", "rawxm", "cmth", "cshom", "cyear")
toEncode <- c("isalrt","curcdm")
y <- xts(x[,numCol], as.Date(x$datadate,format="%d/%m/%Y"))
xtsAttributes(y) <- as.list(x[1,attrCol])
y
})
Each list element is now an xts object, and is much more compact, since you do not repeat completely redundant data. And you can easily run analysis on each gvkey via lapply and friends.
> str(xtsList[["1004"]])
An ‘xts’ object on 1983-01-31/2012-12-31 containing:
Data: num [1:360, 1:13] 3.38 3.38 3.38 3.38 3.38 ...
- attr(*, "dimnames")=List of 2
..$ : NULL
..$ : chr [1:13] "ajexm" "ajpm" "cshtrm" "prccm" ...
Indexed by objects of class: [Date] TZ: UTC
xts Attributes:
List of 13
$ iid : int 1
$ tic : chr "AIR"
$ cusip : int 361105
$ conm : chr "AAR CORP"
$ exchg : int 11
$ secstat: chr "A"
$ tpci : chr "0"
$ cik : int 1750
$ fic : chr "USA"
$ conml : chr "AAR Corp"
$ costat : chr "A"
$ idbflag: chr "D"
$ dldte : chr ""
And you can access the attributes via xtsAttributes:
> xtsAttributes(xtsList[["1004"]])$fic
[1] "USA"
> xtsAttributes(xtsList[["1004"]])$tic
[1] "AIR"
An efficient way to achieve this goal is to covert the Panel Data (long format) into wide format using 'reshape2' package. After performing the estimations, convert it back to long format or panel data format. Here is an example:
library(foreign)
library(reshape2)
dd <- read.dta("DDA.dta") // DDA.dta is Stata data; keep only date, id and variable of interest (i.e. three columns in total)
wdd<-dcast(dd, datadate~gvkey) // gvkey is the id
require(PerformanceAnalytics)
wddxts <- xts(wdd[,-1],order.by=as.Date(wdd$datadate,format= "%Y-%m-%d"))
ssd60A<-rollapply(wddxts,width=60,SemiDeviation,by.column=TRUE,fill=NA) // e.g of rolling window calculation
ssd60A.df<-as.data.frame(ssd60A.xts) // convert dataframe to xts
ssd60A.df$datadate=rownames(ssd60A.df) // insert time index
lssd60A.df<-melt(ssd60A.df, id.vars=c('datadate'),var='gvkey') // convert back to panel format
write.dta(lssd60A.df,"ssd60A.dta",convert.factors = "string") // export as Stata file
Then simply merge it with the master database to perform some regression.

The variable from a netcdf file comes out flipped

I have downloaded a nc file from
f=open.ncdf("file.nc")
[1] "file Lfile.nc has 2 dimensions:"
[1] "Longitude Size: 1440"
[1] "Latitude Size: 720"
[1] "------------------------"
[1] "file filr.nc has 8 variables:"
[1] "short ts[Latitude,Longitude] Longname:Skin Temperature (2mm) Missval:NA"
I then wanted to work with the variable soil_moisture_c
A = get.var.ncdf(nc=f,varid="soil_moisture_c",verbose=TRUE)
I then plot A with image(A). I got the map shown below,I even transposed it image(t(a)) but that was changed to other direction,and not how it should be. Anyway,in order to know what is wrong,I used the netcdf viewer Panoply and the map was correctly plotted as you can see below.
The reason is that the NetCDF interface you are using is very low-level, and all you have done is read out the variable without any of its dimension information. The orientation of the grid is really arbitrary, and the coordinate information needs to be understood in a particular context.
library(raster) ## requires ncdf package for this file
d <- raster("LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T185959Z_20040114.nc", varname = "soil_moisture_c")
(I used a different file to yours, but it should work the same).
It turns out that even raster does not get this right without work, but it does make it easy to rectify:
d <- flip(t(d), direction = "x")
That transposed the data and flipped around "x" (longitude), keeping the georeferencing from the original context.
Plot it up with a map from maptools to check:
plot(d)
library(maptools)
data(wrld_simpl)
plot(wrld_simpl, add = TRUE)
There are many other ways to achieve this by reading the dimension information from the file, but this is at least a shortcut to do most of the hard work for you.
Just as a complement to #mdsumner far better solution, here is a way to do that using library ncdf only.
library(ncdf)
f <- open.ncdf("LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040101.nc")
A <- get.var.ncdf(nf,"soil_moisture_c")
All you need is to find your dimensions in order to have a coherent x and y-axis. If you look at your netCDF objects dimensions, here what you see:
str(f$dim)
List of 2
$ Longitude:List of 8
..$ name : chr "Longitude"
..$ len : int 1440
..$ unlim : logi FALSE
..$ id : int 1
..$ dimvarid : num 2
..$ units : chr "degrees_east"
..$ vals : num [1:1440(1d)] -180 -180 -179 -179 -179 ...
..$ create_dimvar: logi TRUE
..- attr(*, "class")= chr "dim.ncdf"
$ Latitude :List of 8
..$ name : chr "Latitude"
..$ len : int 720
..$ unlim : logi FALSE
..$ id : int 2
..$ dimvarid : num 1
..$ units : chr "degrees_north"
..$ vals : num [1:720(1d)] 89.9 89.6 89.4 89.1 88.9 ...
..$ create_dimvar: logi TRUE
..- attr(*, "class")= chr "dim.ncdf"
Hence your dimensions are:
f$dim$Longitude$vals -> Longitude
f$dim$Latitude$vals -> Latitude
Now your Latitude goes from 90 to -90 instead of the opposite, which image prefers, so:
Latitude <- rev(Latitude)
A <- A[nrow(A):1,]
Finally, as you noticed, the x and y of your object A are flipped so you need to transpose it, and your NA values are represented for some reasons by value -32767:
A[A==-32767] <- NA
A <- t(A)
And finally the plot:
image(Longitude, Latitude, A)
library(maptools)
data(wrld_simpl)
plot(wrld_simpl, add = TRUE)
Edit: To do that on your 31 files, let's call your vector of file names ncfiles and yourpath the directory where you stored them (for simplicity i'm going to assume your variable is always called soil_moisture_c and your NAs are always -32767):
ncfiles
[1] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040101.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040102.nc"
[3] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040103.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040104.nc"
[5] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040105.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040106.nc"
[7] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040107.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040108.nc"
[9] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040109.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040110.nc"
[11] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040111.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040112.nc"
[13] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040113.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040114.nc"
[15] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040115.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040116.nc"
[17] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040117.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040118.nc"
[19] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040119.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040120.nc"
[21] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040121.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040122.nc"
[23] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040123.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040124.nc"
[25] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040125.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040126.nc"
[27] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040127.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040128.nc"
[29] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040129.nc" "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040130.nc"
[31] "LPRM-AMSR_E_L3_D_SOILM3_V002-20120520T173800Z_20040131.nc"
yourpath
[1] "C:\\Users"
library(ncdf)
library(maptools)
data(wrld_simpl)
for(i in 1:length(ncfiles)){
f <- open.ncdf(paste(yourpath,ncfiles[i], sep="\\"))
A <- get.var.ncdf(f,"soil_moisture_c")
f$dim$Longitude$vals -> Longitude
f$dim$Latitude$vals -> Latitude
Latitude <- rev(Latitude)
A <- A[nrow(A):1,]
A[A==-32767] <- NA
A <- t(A)
close.ncdf(f) # this is the important part
png(paste(gsub("\\.nc","",ncfiles[i]), "\\.png", sep="")) # or any other device such as pdf, jpg...
image(Longitude, Latitude, A)
plot(wrld_simpl, add = TRUE)
dev.off()
}
You can also simply invert the latitudes from the command line using CDO:
cdo invertlat file.nc file_inverted.nc

Resources