GeoJson data in R - r

I want to work on GeoJson data having below mentioned format;
{ "id": 1,
"geometry":
{ "type": "Point",
"coordinates": [
-3.706,
40.3],
"properties": {"appuserid": "5b46-7d3c-48a6-9c08-cc894",
"eventtype": "location",
"devicedate": "2016-06-08T07:25:21",
"date": "2016-06-08T07:25:06.507",
"location": {
"building": "2",
"floor": "0",
"elevation": ""
}}}
The problem is i want to use a "Where" clause to "appuserid" and select the selected records for processing. I dont know how to do it ? I have already saved data from a Mongodb in a dataframe.
Right now i am trying to do it as follow;
library(sqldf)
sqldf("SELECT * FROM d WHERE d$properties$appuserid = '0000-0000-0000-0000'")
But it gives an error.
Error: Only lists of raw vectors are currently supported
code is below;
library(jsonlite);
con <- mongo(collection = "geodata", db = "MongoDb", url = "mongodb://192.168.26.18:27017", verbose = FALSE, options = ssl_options());
d <- con$find();
library(jqr)
jq(d, '.features[] | select(d$properties$appuserid == "5b46-7d3c-48a6-9c08-cc894")')
Error : Error in jq.default(d, ".features[] | select(d$properties$appuserid == \"5b46-7d3c-48a6-9c08-cc894\")") :
jq method not implemented for data.frame.

jqr is one option, an R client for jq https://stedolan.github.io/jq/
x <- '{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {
"population": 200
},
"geometry": {
"type": "Point",
"coordinates": [
10.724029,
59.926807
],
"properties": {
"appuserid": "5b46-7d3c-48a6-9c08-cc894"
}
}
},
{
"type": "Feature",
"properties": {
"population": 600
},
"geometry": {
"type": "Point",
"coordinates": [
10.715789,
59.904778
],
"properties": {
"appuserid": "c7e866a7-e32d-4dc2-adfd-c2ca065b25ce"
}
}
}
]
}'
library(jqr)
jq(x, '.features[] | select(.geometry.properties.appuserid == "5b46-7d3c-48a6-9c08-cc894")')
returns
{
"type": "Feature",
"properties": {
"population": 200
},
"geometry": {
"type": "Point",
"coordinates": [
10.724029,
59.926807
],
"properties": {
"appuserid": "5b46-7d3c-48a6-9c08-cc894"
}
}
}

Related

How to project values from a Gremlin traversal with nested and()/or() steps

I have the graph model below which represents the sub-pattern I'd like to traverse or fetch. The nodes and their properties are shown below as well.
The expected response to my query would look something like this:
where 's', 'c', 'aid', 'qid', 'p', 'r1', 'r2' are the nodes that make up the subpattern or subgraph.
[
{
"s": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "severity",
"type": "vertex",
"properties": {
"severity": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "High"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"c": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "cve",
"type": "vertex",
"properties": {
"cve_id": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "CVE-xxxx-xxxx"
}
],
"publishedOn": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"aid": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "aid",
"type": "vertex",
"properties": {
"aid": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"qid": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "qid",
"type": "vertex",
"properties": {
"qid": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"p": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "package",
"type": "vertex",
"properties": {
"name": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxxx"
}
],
"version": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"r1": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "release",
"type": "vertex",
"properties": {
"source": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
],
"status": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"r2": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "release",
"type": "vertex",
"properties": {
"source": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
],
"status": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
{
....
....
},
{
....
..
}
]
My question is how do I build my traversal query to achieve this end result?
What I have so far is this, but the project() step is not working as expected
g.V().hasLabel('cve').as('c').and(
__.in('severity').as('s'),
__.out('cve_to_aid').as('aid').and(
__.out('has_qid').as('qid'),
__.in('package_to_aid').as('p'),
or(
__.in('r1_to_aid').has('status', 'Patched').as('r1'),
__.in('r2_to_aid').has('status', 'Patched').as('r2')
)
)
).project('c', 's', 'aid', 'qid', 'p', 'r1', 'r2').
by(('c').values('cve_id')).
by(('s').values('severity')).
by(('aid').values('aid')).
by(('qid').values('qid')).
by(('p').values()).
by(('r1').values()).
by(('r2').values()).
I am doing this on CosmosDB, so please only provide answers using supported steps found here: https://learn.microsoft.com/en-us/azure/cosmos-db/gremlin/support
It is possible to nest project() steps, e.g. on the TinkerGraph:
gremlin> g = TinkerFactory.createModern().traversal()
==>graphtraversalsource[tinkergraph[vertices:6 edges:6], standard]
gremlin> g.V(1).as('x').project('x').by(
select('x').project('id', 'label','properties').by(id).by(label).by(
project('name').by(properties())
)
)
==>[x:[id:1,label:person,properties:[name:vp[name->marko]]]]
gremlin>
but then you end up coding your entire data model into your query.
In full TinkerPop you could turn your result into a subGraph() and write it to graphSon with the io() step. In Cosmos you can add the returned vertices to a TinkerGraph instance clientside and again use the io() step to serialize the TinkerGraph to graphSon.

Merge all objects inside an array that share the same key

I'm trying to deduplicate all objects inside the array results that share the same key id, and merge their path arrays.
JSON input:
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": "/some/path/a"
},
{
"id": "apple1",
"name": "appleName1",
"path": "/some/path/b"
},
{
"id": "apple2",
"name": "appleName2",
"path": "/some/path/c"
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": "/some/path/a"
},
{
"id": "orange1",
"name": "orangeName1",
"path": "/some/path/b"
},
{
"id": "orange2",
"name": "orangeName2",
"path": "/some/path/c"
}
]
}
]
Expected output:
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "apple2",
"name": "appleName2",
"path": [
"/some/path/c"
]
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "orange2",
"name": "orangeName2",
"path": [
"/some/path/c"
]
}
]
}
]
I've managed to get an approximate solution using:
jq '[{type: .[].type, results: .[].results | group_by(.id) | map({id: .[0].id, name: .[0].name, path: (map(.path))})}]'
But my solution produces two additional elements that aren't supposed to be there.
I know there are some similar questions already answered but I didn't manage to get them to work with this example. Any help is appreciated!
You could group_by the .id field, then for each group take the first item and replace its .path field with a map on the .path fields of all group members:
jq 'map(.results |= (group_by(.id) | map(first + {path: map(.path)})))'
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "apple2",
"name": "appleName2",
"path": [
"/some/path/c"
]
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "orange2",
"name": "orangeName2",
"path": [
"/some/path/c"
]
}
]
}
]
Demo

Convert data to Json with all objects included

I want to convert a feature file to json so that I can pass it to a javascript function in an RMD file.
However, the toJSON function seems to flatten it and remove many of the fields and structures as below. How can I convert it and keep it in tact, as it does if I write to a file using sf::st_write?
url <- 'https://opendata.arcgis.com/api/v3/datasets/bf9d32b1aa9941af84e6c2bf0c54b1bb_0/downloads/data?format=geojson&spatialRefId=4326'
ukWardShapes <- sf::st_read(url) %>%
head(2)
# Looks OK when written out
sf::st_write(ukWardShapes, "wardShapes.geojson")
# Converting to json with toJSON seems drop other top level fields (type, name, crs) and list the objects within features object,
# but without type, and puts all fields in properties at the top level of object.
json_data <- jsonlite::toJSON(ukWardShapes)
# I want to do this as I need to pass it to javascript within an RMD like this
htmltools::tags$script(paste0("var ukWardShapes = ", json_data, ";"))
# Output from st_write - with all the objects and fields listed properly
{
"type": "FeatureCollection",
"name": "wardShapes",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "OBJECTID": 1, "WD21CD": "E05000026", "WD21NM": "Abbey", "WD21NMW": " ", "BNG_E": 544433, "BNG_N": 184376, "LONG": 0.081276, "LAT": 51.53981, "SHAPE_Length": 0.071473941285613768, "SHAPE_Area": 0.00015225110241064838 }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 0.093628520000038, 51.53767283600007 ], [ 0.08163128800004, 51.539165094000055 ], [ 0.085507102000065, 51.537043160000053 ], [ 0.075954208000041, 51.533595714000057 ], [ 0.07333983500007, 51.537621201000036 ], [ 0.068771363000053, 51.536206993000064 ], [ 0.068303699000069, 51.544253423000043 ], [ 0.068361695000021, 51.544390390000046 ], [ 0.08006389600007, 51.544772356000067 ], [ 0.093628520000038, 51.53767283600007 ] ] ] ] } },
{ "type": "Feature", "properties": { "OBJECTID": 2, "WD21CD": "E05000027", "WD21NM": "Alibon", "WD21NMW": " ", "BNG_E": 549247, "BNG_N": 185196, "LONG": 0.150987, "LAT": 51.545921, "SHAPE_Length": 0.074652046036690151, "SHAPE_Area": 0.00017418950412786572 }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 0.161601914000073, 51.543327754000074 ], [ 0.147931795000034, 51.541598449000048 ], [ 0.140256898000075, 51.54111542000004 ], [ 0.13420572800004, 51.540716652000071 ], [ 0.131925236000029, 51.543763455000033 ], [ 0.14633003900002, 51.546332889000041 ], [ 0.142816723000067, 51.550973604000035 ], [ 0.156378253000071, 51.551020271000027 ], [ 0.161601914000073, 51.543327754000074 ] ] ] ] } }
]
}
# Output from toJson which seems to have a lot of structure removed. Note, I'm not
# concerned about it being pretty and separated into lines
[{
"OBJECTID":1, "WD21CD":"E05000026", "WD21NM":"Abbey", "WD21NMW":" ", "BNG_E":544433, "BNG_N":184376, "LONG":0.0813, "LAT":51.5398, "SHAPE_Length":0.0715, "SHAPE_Area":0.0002, "geometry":{
"type":"MultiPolygon", "coordinates":[[[[0.0936, 51.5377], [0.0816, 51.5392], [0.0855, 51.537], [0.076, 51.5336], [0.0733, 51.5376], [0.0688, 51.5362], [0.0683, 51.5443], [0.0684, 51.5444], [0.0801, 51.5448], [0.0936, 51.5377]]]]
}
}, {
"OBJECTID":2, "WD21CD":"E05000027", "WD21NM":"Alibon", "WD21NMW":" ", "BNG_E":549247, "BNG_N":185196, "LONG":0.151, "LAT":51.5459, "SHAPE_Length":0.0747, "SHAPE_Area":0.0002, "geometry":{
"type":"MultiPolygon", "coordinates":[[[[0.1616, 51.5433], [0.1479, 51.5416], [0.1403, 51.5411], [0.1342, 51.5407], [0.1319, 51.5438], [0.1463, 51.5463], [0.1428, 51.551], [0.1564, 51.551], [0.1616, 51.5433]]]]
}
}]
As per #SymbolixAU's comment above, the answer is to use
geojsonsf::sf_geojson() instead of jsonlite::toJSON() as geojson is a specific structure of JSON for spatial data and it needs a specific parser for it.
So my line of code should be:
json_data <- geojsonsf::sf_geojson(ukWardShapes)

How to convert JSON data to tidy format in R

I never have worked with json data in R and unfortunately, I was sent a sample of data as:
{
"task_id": "104",
"status": "succeeded",
"metrics": {
"requests_made": 2,
"network_errors": 0,
"unique_locations_visited": 0,
"requests_queued": 0,
"queue_items_completed": 2,
"queue_items_waiting": 0,
"issue_events": 9,
"caption": "",
"progress": 100
},
"message": "",
"issue_events": [
{
"id": "1234",
"type": "issue_found",
"issue": {
"name": "policy not enforced",
"type_index": 123456789,
"serial_number": "123456789183923712",
"origin": "https://test.com",
"path": "/robots.txt",
"severity": "low",
"confidence": "certain",
"caption": "/robots.txt",
"evidence": [
{
"type": "FirstOrderEvidence",
"detail": {
"band_flags": [
"in_band"
]
},
"request_response": {
"url": "https://test.com/robots.txt",
"request": [
{
"type": "DataSegment",
"data": "jaghsdjgasdgaskjdgasdgashdgsahdgasjkdgh==",
"length": 313
}
],
"response": [
{
"type": "DataSegment",
"data": "asudasjdgasaaasgdasgaksjdhgasjdgkjghKGKGgKJgKJgKJGKgh==",
"length": 303
}
],
"was_redirect_followed": false,
"request_time": "1234567890"
}
}
],
"internal_data": "jdfhgjhJHkjhdskfhkjhjs0sajkdfhKHKhkj=="
}
},
{
"id": "1235",
"type": "issue_found",
"issue": {
"name": "certificate",
"type_index": 12345845684,
"serial_number": "123456789165637150",
"origin": "https://test.com",
"path": "/",
"severity": "info",
"confidence": "certain",
"description": "The server description a valid, trusted certificate. This issue is purely informational.<br><br>The server presented the following certificates:<br><br><h4>Server certificate</h4><table><tr><td><b>Issued to:</b> </td><td>test.ie, test.com, www.test.com, www.test.ie</td></tr><tr><td><b>Issued by:</b> </td><td>GeoTrust EV RSA CA 2018</td></tr><tr><td><b>Valid from:</b> </td><td>Tue May 12 00:00:00 UTC 2020</td></tr><tr><td><b>Valid to:</b> </td><td>Tue May 17 12:00:00 UTC 2022</td></tr></table><h4>Certificate chain #1</h4><table><tr><td><b>Issued to:</b> </td><td>GeoTrust EV RSA CA 2018</td></tr><tr><td><b>Issued by:</b> </td><td> High Assurance EV Root CA</td></tr><tr><td><b>Valid from:</b> </td><td>Mon Nov 06 12:22:46 UTC 2017</td></tr><tr><td><b>Valid to:</b> </td><td>Sat Nov 06 12:22:46 UTC 2027</td></tr></table><h4>Certificate chain #2</h4><table><tr><td><b>Issued to:</b> </td><td> High Assurance EV Root CA</td></tr><tr><td><b>Issued by:</b> </td><td> High Assurance EV Root CA</td></tr><tr><td><b>Valid from:</b> </td><td>Fri Nov 10 00:00:00 UTC 2006</td></tr><tr><td><b>Valid to:</b> </td><td>Mon Nov 10 00:00:00 UTC 2031</td></tr></table>",
"caption": "/",
"evidence": [],
"internal_data": "sjhdgsajdggJGJHgjfgjhGJHgjhsdgfgjhGJHGjhsdgfjhsgfdsjfg098867hjhgJHGJHG=="
}
},
{
"id": "1236",
"type": "issue_found",
"issue": {
"name": "without flag set",
"type_index": 1254392,
"serial_number": "12345678965616",
"origin": "https://test.com",
"path": "/robots.txt",
"severity": "info",
"confidence": "certain",
"description": "my description text here....",
"caption": "/robots.txt",
"evidence": [
{
"type": "InformationListEvidence",
"request_response": {
"url": "https://test.com/robots.txt",
"request": [
{
"type": "DataSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfh==",
"length": 313
}
],
"response": [
{
"type": "DataSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfh=",
"length": 161
},
{
"type": "HighlightSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdf=",
"length": 119
},
{
"type": "DataSegment",
"data": "AasjkdhasjkhkjHKJSDHFJKSDFHKhjkHSKADJFHKhjkhjkh=",
"length": 23
}
],
"was_redirect_followed": false,
"request_time": "178454751191465"
},
"information_items": [
"Other: user_id"
]
}
],
"internal_data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKH=="
}
},
{
"id": "1237",
"type": "issue_found",
"issue": {
"name": "without flag set",
"type_index": 1234567,
"serial_number": "123456789056704",
"origin": "https://test.com",
"path": "/",
"severity": "info",
"confidence": "certain",
"description": "long description here zjkhasdjkh hsajkdhsajkd hasjkdhbsjkdash d",
"caption": "/",
"evidence": [
{
"type": "InformationListEvidence",
"request_response": {
"url": "https://test.com/",
"request": [
{
"type": "DataSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfhsfdsfdsfdsfdsfdsfsdfdsf",
"length": 303
}
],
"response": [
{
"type": "DataSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfh==",
"length": 151
},
{
"type": "HighlightSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfh=",
"length": 119
},
{
"type": "DataSegment",
"data": "sdfdsfsdfSDFSDFdSFDS546SDFSDFDSFG657=",
"length": 23
}
],
"was_redirect_followed": false,
"request_time": "123541191466"
},
"information_items": [
"Other: user_id"
]
}
],
"internal_data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsd=="
}
},
{
"id": "1238",
"type": "issue_found",
"issue": {
"name": "parameter pollution",
"type_index": 4137000,
"serial_number": "123456789810290176",
"origin": "https://test.com",
"path": "/robots.txt",
"severity": "low",
"confidence": "firm",
"description": "very long description text here...",
"caption": "/robots.txt [URL path filename]",
"evidence": [
{
"type": "FirstOrderEvidence",
"detail": {
"payload": {
"bytes": "Q3jkeiZkcmg8MQ==",
"flags": 0
},
"band_flags": [
"in_band"
]
},
"request_response": {
"url": "https://test.com/%3fhdz%26drh%3d1",
"request": [
{
"type": "DataSegment",
"data": "W1QOIC8=",
"length": 5
},
{
"type": "HighlightSegment",
"data": "WRMnBGR6JTI2ZHJoJTNkMQ==",
"length": 16
},
{
"type": "DataSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfhcvxxcvklxcvjkxclvjxclkvjxcklvjlxckjvlxckjvklxcjvxcklvjxcklvjxckljvlxckjvxcklvjxckljvxcklvjcklxjvcxkl==",
"length": 298
}
],
"response": [
{
"type": "DataSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfh==",
"length": 130
},
{
"type": "HighlightSegment",
"data": "Q4jleiZkcmg9MQ==",
"length": 10
},
{
"type": "DataSegment",
"data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfh==",
"length": 163
}
],
"was_redirect_followed": false,
"request_time": "51"
}
}
],
"internal_data": "adjkhajksdhaskjdhkjHKJHjkhaskjdhkjasdhKHKJHkjsdhfkjsdhfkjsdhKHJKHjksdfhsdjkfhksdjhKHKJHJKhsdkfjhsdkfjhKHJKHjksdkfjhsdkjfhKHKJHjkhsdkfjhsdkjfhsdjkfhksdjfhKJHKjksdhfsdjkfhksdjfhsdkjhKHJKhsdkfhsdkjfhsdkfhdskjhKHKjhsdfkjhsdjkfh="
}
}
],
"event_logs": [],
"audit_items": []
}
I read it in R using jsonlite:
df_orig <- fromJSON('dast_sample_output.json', flatten= T)
This gives a nested list type R object. I wish to convert this list to a data frame in a tidy format with all the arrays and sub arrays being unnested.
If you run the str(df_orig), you could see the nested data frames in there.
How do I convert it to tidy format?
I tried unnest(), purrr but struggling to get into the tidy format for analysis? Any pointers would be highly appreciated.
Cheers,
use the jsonlite package function fromJSON()
edit:
set option flatten=T
edit2:
use content( x, 'text') before flattening
here is a full example converting to data.table:
get.json <- GET( apicall.text )
get.json.text <- content( get.json , 'text')
get.json.flat <- fromJSON( get.json.text , flatten = T)
dt <- as.data.table( get.json.flat )

R sf: extract nested geoJSON features nested inside a JSON

I have a JSON file that has geoJSON feature collections nested inside of it.
Is it possible to read in the JSON file using jsonlite::read_json(), extract the geoJSON bits, and then convert the resulting list to a sf object? The alternative is to write the list back to JSON (text) and read the geoJSON using a package like geojsonio.
This is what my JSON code looks like:
{
"all": [
{
"type": "Feature",
"geometry": {
"type": "GeometryCollection",
"geometries": [
{
"type": "Point",
"coordinates": [
-75.155727,
39.956318
]
},{
"type": "LineString",
"coordinates": [
[
-75.15567895337301,
39.95653558798881
],[
-75.15575995337292,
39.95616931624319
]
]
},{
"type": "Point",
"coordinates": [
-75.15566,
39.956432
]
}
]
},
"properties": {
# properties
}
},{
# more features of mixed type
}
]
}
perhaps
x <- '{
"all": [
{
"type": "Feature",
"geometry": {
"type": "GeometryCollection",
"geometries": [
{
"type": "Point",
"coordinates": [
-75.155727,
39.956318
]
},{
"type": "LineString",
"coordinates": [
[
-75.15567895337301,
39.95653558798881
],[
-75.15575995337292,
39.95616931624319
]
]
},{
"type": "Point",
"coordinates": [
-75.15566,
39.956432
]
}
]
},
"properties": null
}
]
}'
sf::st_read(jqr::jq(x, ".all[]"))
(string edited to be valid JSON)

Resources