Convert data to Json with all objects included - r

I want to convert a feature file to json so that I can pass it to a javascript function in an RMD file.
However, the toJSON function seems to flatten it and remove many of the fields and structures as below. How can I convert it and keep it in tact, as it does if I write to a file using sf::st_write?
url <- 'https://opendata.arcgis.com/api/v3/datasets/bf9d32b1aa9941af84e6c2bf0c54b1bb_0/downloads/data?format=geojson&spatialRefId=4326'
ukWardShapes <- sf::st_read(url) %>%
head(2)
# Looks OK when written out
sf::st_write(ukWardShapes, "wardShapes.geojson")
# Converting to json with toJSON seems drop other top level fields (type, name, crs) and list the objects within features object,
# but without type, and puts all fields in properties at the top level of object.
json_data <- jsonlite::toJSON(ukWardShapes)
# I want to do this as I need to pass it to javascript within an RMD like this
htmltools::tags$script(paste0("var ukWardShapes = ", json_data, ";"))
# Output from st_write - with all the objects and fields listed properly
{
"type": "FeatureCollection",
"name": "wardShapes",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "OBJECTID": 1, "WD21CD": "E05000026", "WD21NM": "Abbey", "WD21NMW": " ", "BNG_E": 544433, "BNG_N": 184376, "LONG": 0.081276, "LAT": 51.53981, "SHAPE_Length": 0.071473941285613768, "SHAPE_Area": 0.00015225110241064838 }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 0.093628520000038, 51.53767283600007 ], [ 0.08163128800004, 51.539165094000055 ], [ 0.085507102000065, 51.537043160000053 ], [ 0.075954208000041, 51.533595714000057 ], [ 0.07333983500007, 51.537621201000036 ], [ 0.068771363000053, 51.536206993000064 ], [ 0.068303699000069, 51.544253423000043 ], [ 0.068361695000021, 51.544390390000046 ], [ 0.08006389600007, 51.544772356000067 ], [ 0.093628520000038, 51.53767283600007 ] ] ] ] } },
{ "type": "Feature", "properties": { "OBJECTID": 2, "WD21CD": "E05000027", "WD21NM": "Alibon", "WD21NMW": " ", "BNG_E": 549247, "BNG_N": 185196, "LONG": 0.150987, "LAT": 51.545921, "SHAPE_Length": 0.074652046036690151, "SHAPE_Area": 0.00017418950412786572 }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 0.161601914000073, 51.543327754000074 ], [ 0.147931795000034, 51.541598449000048 ], [ 0.140256898000075, 51.54111542000004 ], [ 0.13420572800004, 51.540716652000071 ], [ 0.131925236000029, 51.543763455000033 ], [ 0.14633003900002, 51.546332889000041 ], [ 0.142816723000067, 51.550973604000035 ], [ 0.156378253000071, 51.551020271000027 ], [ 0.161601914000073, 51.543327754000074 ] ] ] ] } }
]
}
# Output from toJson which seems to have a lot of structure removed. Note, I'm not
# concerned about it being pretty and separated into lines
[{
"OBJECTID":1, "WD21CD":"E05000026", "WD21NM":"Abbey", "WD21NMW":" ", "BNG_E":544433, "BNG_N":184376, "LONG":0.0813, "LAT":51.5398, "SHAPE_Length":0.0715, "SHAPE_Area":0.0002, "geometry":{
"type":"MultiPolygon", "coordinates":[[[[0.0936, 51.5377], [0.0816, 51.5392], [0.0855, 51.537], [0.076, 51.5336], [0.0733, 51.5376], [0.0688, 51.5362], [0.0683, 51.5443], [0.0684, 51.5444], [0.0801, 51.5448], [0.0936, 51.5377]]]]
}
}, {
"OBJECTID":2, "WD21CD":"E05000027", "WD21NM":"Alibon", "WD21NMW":" ", "BNG_E":549247, "BNG_N":185196, "LONG":0.151, "LAT":51.5459, "SHAPE_Length":0.0747, "SHAPE_Area":0.0002, "geometry":{
"type":"MultiPolygon", "coordinates":[[[[0.1616, 51.5433], [0.1479, 51.5416], [0.1403, 51.5411], [0.1342, 51.5407], [0.1319, 51.5438], [0.1463, 51.5463], [0.1428, 51.551], [0.1564, 51.551], [0.1616, 51.5433]]]]
}
}]

As per #SymbolixAU's comment above, the answer is to use
geojsonsf::sf_geojson() instead of jsonlite::toJSON() as geojson is a specific structure of JSON for spatial data and it needs a specific parser for it.
So my line of code should be:
json_data <- geojsonsf::sf_geojson(ukWardShapes)

Related

jq array filter for nested array elements

I am trying to add a new user in below json which matches group NP01-RW. i am able to do without NP01-RW but not able to select users under NP01-RW and return updated json.
{
"id": 181,
"guid": "c9b7dbde-63de-42cc-9840-1b4a06e13364",
"isEnabled": true,
"version": 17,
"service": "Np-Hue",
"name": "DATASCIENCE-CUROPT-RO",
"policyType": 0,
"policyPriority": 0,
"isAuditEnabled": true,
"resources": {
"database": {
"values": [
"hive_cur_acct_1dev",
"hive_cur_acct_1eng",
"hive_cur_acct_1rwy",
"hive_cur_acct_1stg",
"hive_opt_acct_1dev",
"hive_opt_acct_1eng",
"hive_opt_acct_1stg",
"hive_opt_acct_1rwy"
],
"isExcludes": false,
"isRecursive": false
},
"column": {
"values": [
"*"
],
"isExcludes": false,
"isRecursive": false
},
"table": {
"values": [
"*"
],
"isExcludes": false,
"isRecursive": false
}
},
"policyItems": [
{
"accesses": [
{
"type": "select",
"isAllowed": true
},
{
"type": "update",
"isAllowed": true
},
{
"type": "create",
"isAllowed": true
},
{
"type": "drop",
"isAllowed": true
},
{
"type": "alter",
"isAllowed": true
},
{
"type": "index",
"isAllowed": true
},
{
"type": "lock",
"isAllowed": true
},
{
"type": "all",
"isAllowed": true
},
{
"type": "read",
"isAllowed": true
},
{
"type": "write",
"isAllowed": true
}
],
"users": [
"user1",
"user2",
"user3"
],
"groups": [
"NP01-RW"
],
"conditions": [],
"delegateAdmin": false
},
{
"accesses": [
{
"type": "select",
"isAllowed": true
}
],
"users": [
"user1"
],
"groups": [
"NP01-RO"
],
"conditions": [],
"delegateAdmin": false
}
],
"denyPolicyItems": [],
"allowExceptions": [],
"denyExceptions": [],
"dataMaskPolicyItems": [],
"rowFilterPolicyItems": [],
"options": {},
"validitySchedules": [],
"policyLabels": [
"DATASCIENCE-CurOpt-RO_NP01"
]
}
below is what i have tried but it returns part of the JSON matching NP01-RW and not full JSON
jq --arg username "$sync_userName" '.policyItems[] | select(.groups[] | IN("NP01-RO")).users += [$username]' > ${sync_policyName}.json
Operator precedence in jq is not always intuitive. Your program is parsed as:
.policyItems[] | (select(.groups[] | IN("NP01-RO")).users += [$username])
Which first streams all policyItems and only then changes them, leaving you with policyItems only in the output.
You need to make sure that the stream selects the correct values, which you can then assign:
(.policyItems[] | select(.groups[] | IN("NP01-RO")).users) += [$username]
This will do the assignment, but still return the full input (.).

Merge all objects inside an array that share the same key

I'm trying to deduplicate all objects inside the array results that share the same key id, and merge their path arrays.
JSON input:
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": "/some/path/a"
},
{
"id": "apple1",
"name": "appleName1",
"path": "/some/path/b"
},
{
"id": "apple2",
"name": "appleName2",
"path": "/some/path/c"
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": "/some/path/a"
},
{
"id": "orange1",
"name": "orangeName1",
"path": "/some/path/b"
},
{
"id": "orange2",
"name": "orangeName2",
"path": "/some/path/c"
}
]
}
]
Expected output:
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "apple2",
"name": "appleName2",
"path": [
"/some/path/c"
]
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "orange2",
"name": "orangeName2",
"path": [
"/some/path/c"
]
}
]
}
]
I've managed to get an approximate solution using:
jq '[{type: .[].type, results: .[].results | group_by(.id) | map({id: .[0].id, name: .[0].name, path: (map(.path))})}]'
But my solution produces two additional elements that aren't supposed to be there.
I know there are some similar questions already answered but I didn't manage to get them to work with this example. Any help is appreciated!
You could group_by the .id field, then for each group take the first item and replace its .path field with a map on the .path fields of all group members:
jq 'map(.results |= (group_by(.id) | map(first + {path: map(.path)})))'
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "apple2",
"name": "appleName2",
"path": [
"/some/path/c"
]
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "orange2",
"name": "orangeName2",
"path": [
"/some/path/c"
]
}
]
}
]
Demo

R sf: extract nested geoJSON features nested inside a JSON

I have a JSON file that has geoJSON feature collections nested inside of it.
Is it possible to read in the JSON file using jsonlite::read_json(), extract the geoJSON bits, and then convert the resulting list to a sf object? The alternative is to write the list back to JSON (text) and read the geoJSON using a package like geojsonio.
This is what my JSON code looks like:
{
"all": [
{
"type": "Feature",
"geometry": {
"type": "GeometryCollection",
"geometries": [
{
"type": "Point",
"coordinates": [
-75.155727,
39.956318
]
},{
"type": "LineString",
"coordinates": [
[
-75.15567895337301,
39.95653558798881
],[
-75.15575995337292,
39.95616931624319
]
]
},{
"type": "Point",
"coordinates": [
-75.15566,
39.956432
]
}
]
},
"properties": {
# properties
}
},{
# more features of mixed type
}
]
}
perhaps
x <- '{
"all": [
{
"type": "Feature",
"geometry": {
"type": "GeometryCollection",
"geometries": [
{
"type": "Point",
"coordinates": [
-75.155727,
39.956318
]
},{
"type": "LineString",
"coordinates": [
[
-75.15567895337301,
39.95653558798881
],[
-75.15575995337292,
39.95616931624319
]
]
},{
"type": "Point",
"coordinates": [
-75.15566,
39.956432
]
}
]
},
"properties": null
}
]
}'
sf::st_read(jqr::jq(x, ".all[]"))
(string edited to be valid JSON)

Why is google analytics return different number of results with same parameters?

Reporting API v4
I am a developer. I have my clients google adwords and analytics. I have been using adwords and analytics report API for almost a year now.
I am also using https://ga-dev-tools.appspot.com/query-explorer/. The query builder. For comparing if I have retrieve the right amount of data.
I don't know if its an error or not but its acting weird.
Try number 1 using https://ga-dev-tools.appspot.com/query-explorer/
I tried to add 2 metrics and 7 dimensions. This Account ID, contains 1 million data in only 1 month. I know this because I retrieved 1 million in a range of july 25, 2018 - august 16, 2018.
Then, here's the interesting part. I run the query again with the same parameters, it retrieves 5999 results. I did it again it returns 1 million. The results keep changing. I thought its the error in my code but its also happening in the query builder.
What do you guys think? is it a bug or not?
You can try this if you have more than a million data.
I know its not related to coding. But Google Analytics doesn't have forums just like Adwords.
Try number 2 using this link https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet
this is my request
{
"reportRequests": [
{
"dateRanges": [
{
"endDate": "2018-08-16",
"startDate": "2018-07-16"
}
],
"dimensions": [
{
"name": "ga:dimension2"
},
{
"name": "ga:dimension3"
},
{
"name": "ga:dimension1"
},
{
"name": "ga:adPlacementDomain"
}
],
"pageSize": 5,
"viewId": "********",
"samplingLevel": "LARGE",
"metrics": [
{
"expression": "ga:entrances"
},
{
"expression": "ga:newUsers"
}
],
"includeEmptyRows": true
}
]
}
The return of rowCount is sometimes 2111 and then 1000000.
This my response json with 1million result:
{
"reports": [
{
"columnHeader": {
"dimensions": [
"ga:dimension2",
"ga:dimension3",
"ga:dimension1",
"ga:adPlacementDomain"
],
"metricHeader": {
"metricHeaderEntries": [
{
"name": "ga:entrances",
"type": "INTEGER"
},
{
"name": "ga:newUsers",
"type": "INTEGER"
}
]
}
},
"data": {
"rows": [
{
"dimensions": [
"(other)",
"(other)",
"(other)",
"(other)"
],
"metrics": [
{
"values": [
"120834",
"68730"
]
}
]
},
{
"dimensions": [
"1000025873.1532426892",
"1532426891790.o9z84x",
"2018-07-24T11:08:15.449+01:00",
"unknown"
],
"metrics": [
{
"values": [
"0",
"0"
]
}
]
},
{
"dimensions": [
"1000025873.1532426892",
"1532426891790.o9z84x",
"2018-07-24T11:08:17.589+01:00",
"unknown"
],
"metrics": [
{
"values": [
"0",
"0"
]
}
]
},
{
"dimensions": [
"1000025873.1532426892",
"1532426891790.o9z84x",
"2018-07-24T11:08:31.809+01:00",
"unknown"
],
"metrics": [
{
"values": [
"0",
"0"
]
}
]
},
{
"dimensions": [
"1000025873.1532426892",
"1532427045552.p38pk78",
"2018-07-24T11:09:06.43+01:00",
"unknown"
],
"metrics": [
{
"values": [
"0",
"0"
]
}
]
}
],
"totals": [
{
"values": [
"158626",
"90225"
]
}
],
"rowCount": 1000000,
"minimums": [
{
"values": [
"0",
"0"
]
}
],
"maximums": [
{
"values": [
"120834",
"68730"
]
}
],
"isDataGolden": true
},
"nextPageToken": "5"
}
]
}
another response example when i have less 1million results:
{
"reports": [
{
"columnHeader": {
"dimensions": [
"ga:dimension2",
"ga:dimension3",
"ga:dimension1",
"ga:adPlacementDomain"
],
"metricHeader": {
"metricHeaderEntries": [
{
"name": "ga:entrances",
"type": "INTEGER"
},
{
"name": "ga:newUsers",
"type": "INTEGER"
}
]
}
},
"data": {
"rows": [
{
"dimensions": [
"1002211166.1531434756",
"1531762918308.fjnj7pa6",
"2018-07-16T18:41:58.307+01:00",
"mobileapp::2-com.forsbit.spider"
],
"metrics": [
{
"values": [
"1",
"0"
]
}
]
},
{
"dimensions": [
"1002211166.1531434756",
"1531771001486.jawfrpz8",
"2018-07-16T20:56:41.482+01:00",
"mobileapp::2-com.forsbit.spider"
],
"metrics": [
{
"values": [
"1",
"0"
]
}
]
},
{
"dimensions": [
"1002211166.1531434756",
"1531772475507.7n4w2qzb",
"2018-07-16T21:21:15.503+01:00",
"mobileapp::2-com.forsbit.spider"
],
"metrics": [
{
"values": [
"1",
"0"
]
}
]
},
{
"dimensions": [
"1002211166.1531434756",
"1531859165986.zl7we6a5",
"2018-07-17T21:26:05.977+01:00",
"mobileapp::2-com.forsbit.spider"
],
"metrics": [
{
"values": [
"1",
"0"
]
}
]
},
{
"dimensions": [
"1002211166.1531434756",
"1531859632678.dz7hccsa",
"2018-07-17T21:33:52.673+01:00",
"mobileapp::2-com.forsbit.spider"
],
"metrics": [
{
"values": [
"1",
"0"
]
}
]
},
{
"dimensions": [
"1002211166.1531434756",
"1531861026792.kw71ngx9",
"2018-07-17T21:42:31.667+01:00",
"mobileapp::2-com.forsbit.spider"
],
"metrics": [
{
"values": [
"1",
"0"
]
}
]
}
],
"totals": [
{
"values": [
"2111",
"233"
]
}
],
"rowCount": 2112,
"minimums": [
{
"values": [
"0",
"0"
]
}
],
"maximums": [
{
"values": [
"1",
"1"
]
}
],
"isDataGolden": true
},
"nextPageToken": "6"
}
]
}
I am assuming that you have kept all the queries intact. Double check just to make sure.
Second step would be to check for sampling. Check the field samplingSpaceSizes and samplesReadCounts in the response for sampling. If these fields were not defined that means no sampling was introduced.

GeoJson data in R

I want to work on GeoJson data having below mentioned format;
{ "id": 1,
"geometry":
{ "type": "Point",
"coordinates": [
-3.706,
40.3],
"properties": {"appuserid": "5b46-7d3c-48a6-9c08-cc894",
"eventtype": "location",
"devicedate": "2016-06-08T07:25:21",
"date": "2016-06-08T07:25:06.507",
"location": {
"building": "2",
"floor": "0",
"elevation": ""
}}}
The problem is i want to use a "Where" clause to "appuserid" and select the selected records for processing. I dont know how to do it ? I have already saved data from a Mongodb in a dataframe.
Right now i am trying to do it as follow;
library(sqldf)
sqldf("SELECT * FROM d WHERE d$properties$appuserid = '0000-0000-0000-0000'")
But it gives an error.
Error: Only lists of raw vectors are currently supported
code is below;
library(jsonlite);
con <- mongo(collection = "geodata", db = "MongoDb", url = "mongodb://192.168.26.18:27017", verbose = FALSE, options = ssl_options());
d <- con$find();
library(jqr)
jq(d, '.features[] | select(d$properties$appuserid == "5b46-7d3c-48a6-9c08-cc894")')
Error : Error in jq.default(d, ".features[] | select(d$properties$appuserid == \"5b46-7d3c-48a6-9c08-cc894\")") :
jq method not implemented for data.frame.
jqr is one option, an R client for jq https://stedolan.github.io/jq/
x <- '{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"population": 200
},
"geometry": {
"type": "Point",
"coordinates": [
10.724029,
59.926807
],
"properties": {
"appuserid": "5b46-7d3c-48a6-9c08-cc894"
}
}
},
{
"type": "Feature",
"properties": {
"population": 600
},
"geometry": {
"type": "Point",
"coordinates": [
10.715789,
59.904778
],
"properties": {
"appuserid": "c7e866a7-e32d-4dc2-adfd-c2ca065b25ce"
}
}
}
]
}'
library(jqr)
jq(x, '.features[] | select(.geometry.properties.appuserid == "5b46-7d3c-48a6-9c08-cc894")')
returns
{
"type": "Feature",
"properties": {
"population": 200
},
"geometry": {
"type": "Point",
"coordinates": [
10.724029,
59.926807
],
"properties": {
"appuserid": "5b46-7d3c-48a6-9c08-cc894"
}
}
}

Resources