how to parse nested key value pairs in R - r

i am trying to parse a log file which contains structure in the form of key value pairs.
log <- c("name:praveen,age:23,place:UP,address:,dob:, site: {site_name:something , site_url: http://something.com, description:}")
i am trying to parse this line i have done some work but i have two main problem here .
1: How can i parse "site" variable (shown above) because for site key there are multiple key:value pair ?
2: How to tackle with condition if separator comes as a string . like for key:value pair separator is colon (:) and in the "site" key there is a key:value pair site_url:http://something.com here url also contains colon (:) which gives the wrong answer.
this is my code it does not contains "site" key becuase i don't know how to parse it
log <- c("name:praveen,age:23,place:UP,address:,dob:")
names <- setNames(1:5,c("name","age","place","address","dob"))
assign <- function(x, names){
key_value <- sapply(x, function(i)if(length(i)==2L) i else c(i, "nothing"))
z <- rep(NA, length(names))
z[names[key_value[1, ]]] <- key_value[2, ]
z
}
split_by_comma <- strsplit(log,",")
split_by_colon <- lapply(split_by_comma,strsplit,":")
ret <- t(sapply(split_by_colon, assign, names))
colnames(ret) <- names(names)
ret
please help me to parse this file thank you
i have updated with actual log file format.
{
"username": "lavita",
"host": "10.105.22.32",
"event_source": "server",
"event_type": "/courses/IITB/CS101/2014_T1/xblock/i4x:;_;_IITB;_CS101;_video;_d333fa637a074b41996dc2fd5e675818/handler/xmodule_handler/save_user_state",
"context": {
"course_id": "IITB/CS101/2014_T1",
"course_user_tags": {},
"user_id": 42,
"org_id": "IITB"
},
"time": "2014-06-20T05:49:10.468638+00:00",
"ip": "127.0.0.1",
"event": "{\"POST\": {\"saved_video_position\": [\"00:02:10\"]}, \"GET\": {}}",
"agent": "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:18.0) Gecko/20100101 Firefox/18.0",
"page": null
}
{
"username": "raeha",
"host": "10.105.22.32",
"event_source": "server",
"event_type": "problem_check",
"context": {
"course_id": "IITB/CS101/2014_T1",
"course_user_tags": {},
"user_id": 40,
"org_id": "IITB",
"module": {
"display_name": ""
}
},
"time": "2014-06-20T06:43:52.716455+00:00",
"ip": "127.0.0.1",
"event": {
"submission": {
"i4x-IITB-CS101-problem-33e4aac93dc84f368c93b1d08fa984fc_2_1": {
"input_type": "choicegroup",
"question": "",
"response_type": "multiplechoiceresponse",
"answer": "MenuInflater.inflate()",
"variant": "",
"correct": true
}
},
"success": "correct",
"grade": 1,
"correct_map": {
"i4x-IITB-CS101-problem-33e4aac93dc84f368c93b1d08fa984fc_2_1": {
"hint": "",
"hintmode": null,
"correctness": "correct",
"npoints": null,
"msg": "",
"queuestate": null
}
},
"state": {
"student_answers": {},
"seed": 1,
"done": null,
"correct_map": {},
"input_state": {
"i4x-IITB-CS101-problem-33e4aac93dc84f368c93b1d08fa984fc_2_1": {}
}
},
"answers": {
"i4x-IITB-CS101-problem-33e4aac93dc84f368c93b1d08fa984fc_2_1": "choice_0"
},
"attempts": 1,
"max_grade": 1,
"problem_id": "i4x://IITB/CS101/problem/33e4aac93dc84f368c93b1d08fa984fc"
},
"agent": "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:29.0) Gecko/20100101 Firefox/29.0",
"page": "x_module"
}
{
"username": "tushars",
"host": "localhost",
"event_source": "server",
"event_type": "/courses/IITB/CS101/2014_T1/instructor_dashboard/api/list_instructor_tasks",
"context": {
"course_id": "IITB/CS101/2014_T1",
"course_user_tags": {},
"user_id": 6,
"org_id": "IITB"
},
"time": "2014-06-20T05:49:26.780244+00:00",
"ip": "127.0.0.1",
"event": "{\"POST\": {}, \"GET\": {}}",
"agent": "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:29.0) Gecko/20100101 Firefox/29.0",
"page": null
}

This is a pretty ugly format. True json would have quoted strings and non-empty values so it's not really a standard format. Here's a method that's equally as ugly, but it can handle multiple nested elements.
I'll use this as a test case
log <- paste0("name:{first:praveen,last:smith},age:23,place:UP,address:,",
"dob:, site: {site_name:something , site_url: http://something.com, ",
"description:{english:woot,spanish:wooto}}")
And here's the parser
parseString<-function(log) {
nested<-c()
#find {} blocks and replace
m<-regexec("\\{[^}{]+?\\}", log)
while(sapply(m, `[`, 1)!=-1) {
s <- gsub("^\\{|\\}$","",sapply(regmatches(log,m), `[`, 1))
regmatches(log,m)<-paste0("~~", length(nested)+seq_along(s), "~~")
nested<-c(nested,s)
m<-gregexpr("\\{([^}{]+)\\}", log)
}
nested<-c(nested, log)
#turn elements into list
nestedl<-vector("list", length(nested))
for(i in seq_along(nested)) {
kv<-strsplit(nested[i], "\\s*,\\s*")[[1]]
kv<-lapply(strsplit(kv, ":"), function(x)
c(x[1], paste(x[-1],collapse=":")))
names <- gsub("\\s+","", sapply(kv, `[`,1))
vals <- gsub("\\s+","", sapply(kv, `[`,2))
valsl <- setNames(as.list(vals), names)
m <- regexec("~~(\\d+)~~", vals)
for(j in which(sapply(m, `[`, 1) != -1)) {
valsl[[j]]<-nestedl[[as.numeric(regmatches(vals[j], m[j])[[1]][2])]]
}
nestedl[[i]]<-valsl
}
nestedl[[length(nestedl)]]
}
So the strategy is to find the "{}" blocks and collapse them down to a simple string we can find again later; in this case i use "~~1~~" where the number in the middle is a unique ID for each block. I do this till i only have a set of name value pairs. The I go back, look for all the "~~" values and merge the correct sublist back in. For this test data, I get
#parseString(log)
$name
$name$first
[1] "praveen"
$name$last
[1] "smith"
$age
[1] "23"
$place
[1] "UP"
$address
[1] ""
$dob
[1] ""
$site
$site$site_name
[1] "something"
$site$site_url
[1] "http://something.com"
$site$description
$site$description$english
[1] "woot"
$site$description$spanish
[1] "wooto"

Related

Parsing json file with jq from HPE iLO

I have a json file pulled from an HPE iLO interface with the snmp configuration. It looks like:
[
{
"Comments": {
"BIOSDate": "01/23/2021",
"BIOSFamily": "U30",
"Manufacturer": "HPE",
"Model": "ProLiant DL380 Gen10",
"SerialNumber": "5UNESX378",
"iLOVersion": "iLO 5 v2.65"
}
},
{
"#HpeiLOSnmpService.v2_3_0.HpeiLOSnmpService": {
"/redfish/v1/Managers/1/SnmpService/": {
"#odata.context": "/redfish/v1/$metadata#HpeiLOSnmpService.HpeIloSnmpService",
"#odata.id": "/redfish/v1/Managers/1/SnmpService",
"Actions": {
"#HpeIloSnmpService.SendSNMPTestAlert": {
"target": "/redfish/v1/Managers/1/SnmpService/Actions/HpeILOSnmpService.SendSNMPTestAlert/"
}
},
"AlertDestinationAssociations": [
{
"SNMPAlertProtocol": "SNMPv3Trap",
"SecurityName": null
}
],
"AlertDestinations": [
"1.2.3.4",
"5.6.7.8",
null,
null
],
"AlertsEnabled": true,
"Name": "SnmpService"
},
"PeriodicHSATrapConfig": "Disabled",
"ReadCommunities": [
"",
"",
""
],
"Role": "",
"RoleDetail": "",
"SNMPAlertDestinations": {
"#odata.id": "/redfish/v1/Managers/1/SnmpService/SNMPAlertDestinations/"
},
"SNMPUsers": {
"#odata.id": "/redfish/v1/Managers/1/SnmpService/SNMPUsers/"
},
"SNMPv1Enabled": false,
"SNMPv3EngineID": "0x8920000000E3028329E002033",
"SNMPv3InformRetryAttempt": 2,
"SNMPv3InformRetryIntervalSeconds": 15,
"Status": {
"State": "Enabled"
},
"TrapCommunities": [
"",
"",
"",
"",
"",
"",
""
],
"TrapSourceHostname": "Manager",
"Users": [
{
"AuthProtocol": "MD5",
"PrivacyProtocol": "DES",
"SecurityName": "",
"UserEngineID": null
},
{
"AuthProtocol": "MD5",
"PrivacyProtocol": "DES",
"SecurityName": "",
"UserEngineID": null
},
{
"AuthProtocol": "SHA",
"PrivacyProtocol": "AES",
"SecurityName": "oneview_4849283d97929392",
"UserEngineID": null
},
{
"AuthProtocol": "MD5",
"PrivacyProtocol": "DES",
"SecurityName": "",
"UserEngineID": null
}
]
}
}
]
I want to select an element in the Users array that has SecurityName set to "" and change that element. I don't need the Comments portion. So, I try to select the section starting with #HpeiLOSnmpService.v2_3_0.HpeiLOSnmpService with:
jq -r '.[] | .#HpeiLOSnmpService.v2_3_0.HpeiLOSnmpService' snmp.json
but it gives me everything without the enclosing array. Anyone have a suggestion?
Thanks!
# starts a comment and your jq program degenerates to .[]|. which is identical to the program .[] (|. is a no-op/the identity filter). This program will simply select all values from the input.
You must quote certain characters, such as #, when they are part of propery names. The following will work with your JSON file:
jq -r '.[] | ."#HpeiLOSnmpService".v2_3_0.HpeiLOSnmpService'
Thanks. Using the quotes around the key with a '#' works. Ultimately, selecting the SecurityName that was unset was done with:
jq -r '.Users[] | select (.SecurityName == "") | {"AuthProtocol":.AuthProtocol,"PrivacyProtocol":.PrivacyProtocol,"SecurityName":.SecurityName,"UserEngineID":.UserEngineID}'

R - JSON Structure for POST

I've read everything i can find around create JSONs using toJSON(), but it's just not clicking. Any guidance would be greatly appreciated.
I am trying to create a JSON for a POST request to an API. I'm trying to create the following structure:
{
"configuration": {
"Id": 1,
"OptionIds": [
261,
263,
533122,
228
]
},
"length": 15000,
"zip": "92691",
"Date": "2019-09-11T15:46:31.354Z"
}
Trying to break it down element by element I can begin to get close, but can never get to the final solution once attempt to combine everything.
For example.
v_opt_ids <- list(OptionsIds = c(261,
263,
533122,
228))
cat(jsonlite::toJSON(v_opt_ids, pretty=T))
Produces:
{
"OptionsIds": [261, 263, 533122, 228]
}
And while this is not exactly what i'm aiming for in that portion, when combined with additional elements.
config <- data.frame(
vehicleId = c(444449),
v_opt_ids)
cat(jsonlite::toJSON(list(configuration = config), pretty=T))
Things go awry pretty quickly..
{
"configuration": [
{
"vehicleId": 444449,
"OptionsIds": 261
},
{
"vehicleId": 444449,
"OptionsIds": 263
},
{
"vehicleId": 444449,
"OptionsIds": 533122
},
{
"vehicleId": 444449,
"OptionsIds": 228
}
]
}
Any guidance would be greatly appreciated.
Use nested lists:
L <- list(
configuration = list(
Id = 1,
OptionIds = c(261, 263, 533122, 228)
),
length = 15000,
zip = "92691",
Date = "2019-09-11T15:46:31.354Z"
)
jsonlite::toJSON(L, pretty = TRUE, auto_unbox = TRUE)
# {
# "configuration": {
# "Id": 1,
# "OptionIds": [261, 263, 533122, 228]
# },
# "length": 15000,
# "zip": "92691",
# "Date": "2019-09-11T15:46:31.354Z"
# }
(pretty=TRUE is not required for the JSON, just using it to attempt to match your expected output. auto_unbox=TRUE ensures that one gets "Id":1 instead of "Id":[1], which is functionally identical for most things that consume json, some find it a preferable format.

Avoid unboxing for vectors with 1 value

For an API I wish to push data to, I need to avoid unboxing to happen on specific values.
Consider the following input:
library(jsonlite)
lsA <- list(propertyName = "listA",
Values = c("x"))
lsB <- list(propertyName = "listB",
Values = c("a","b","c"))
lsC <- list(propertyName = "listC",
min = 1,
max = 3)
I want my output to be like this:
[
{
"propertyName": "listA",
"Values": ["x"]
},
{
"propertyName": "listB",
"Values": ["a", "b", "c"]
},
{
"propertyName": "listC",
"min": 1,
"max": 3
}
]
However, when I do this:
lsTest <- list()
lsTest <- list.append(lsTest,I(lsA),lsB,lsC)
jsonTest <- jsonlite::toJSON(lsTest,auto_unbox = TRUE, pretty = TRUE)
jsonTest
I'm getting this (notice the unboxed value for listA):
[
{
"propertyName": "listA",
"Values": "x"
},
{
"propertyName": "listB",
"Values": ["a", "b", "c"]
},
{
"propertyName": "listC",
"min": 1,
"max": 3
}
]
How can I avoid specific one-element vectors to be unboxed during the toJSON conversion?
EDIT: cwthom kindly resolved it. Just change c("x") to list("x"). It works for lists with multiple items as well, and only add some additional new lines, which appear to be cosmetics only and did not have any negative impact on the end result on my end.

Is there an R function for getting the values that are on the brackets in a JSON file?

I'm trying to get some data in a JSON file using R, but it does not work when the data is under brackets and keys, I'm getting a lot of data, the problem is actually getting the value of the "released" parameter. example:
{
"index": [
{
"id": "a979eb2b85d6c13086b29a21bdc421b2673379a4",
"date": "2019-03-22T01:20:01-0300",
"status": "OK",
"sensor": [
{
"id": "15",
"number": 127,
"callback": {
"released": true #it is not possible to return this data
}
}
]
},
{
"id": "db2890f501a3a49ed74aeb065168e057c3fd51d2",
"date": "2019-03-25T01:20:01-0300",
"status": "NOK",
"sensor": [
{
"id": "15",
"number": 149,
"callback": {
"released": false #it is not possible to return this data
}
}
]
}
]
}
Follow the code:
library(jsonlite)
data <- fromJSON("Desktop/json/file.json")
pagination <- list()
for(i in 0:10){
pagination[[i+1]] <- data$index$sensor$callback
}
data_org <- rbind_pages(pagination)
nrow(data_org)
length <- nrow(data_org)
data_org[1:length, c("released")]
The response was being:
nrow(data_org)
# [1] 0
data_org[1:length, c("released")]
# NULL

JQ Cross reference or how to replace one value with another part of the input

I want to parse terraform.tfstate (where openstack provider is used), to return instance name and it's internal + floating IP (if assigned).
First select what we are interested in:
jq -r '.modules?[]|.resources[]?|select(.type == "openstack_compute_floatingip_v2", .type == "openstack_compute_instance_v2")' < terraform.tfstate
For simplicity, pre-parsed example with the above part (one FIP and one instance):
{
"type": "openstack_compute_floatingip_v2",
"depends_on": [
"openstack_networking_router_interface_v2.management"
],
"primary": {
"id": "48b039fc-a9fa-4672-934a-32d6d267f280",
"attributes": {
"address": "209.66.89.143",
"fixed_ip": "10.10.10.5",
"id": "48b039fc-a9fa-4672-934a-32d6d267f280",
"instance_id": "597e75e8-834d-4f05-8408-e2e6e733577e",
"pool": "public",
"region": "RegionOne"
},
"meta": {},
"tainted": false
},
"deposed": [],
"provider": "provider.openstack"
}
{
"type": "openstack_compute_instance_v2",
"depends_on": [
"openstack_compute_floatingip_v2.management",
"openstack_compute_secgroup_v2.ssh_only",
"openstack_networking_network_v2.management"
],
"primary": {
"id": "597e75e8-834d-4f05-8408-e2e6e733577e",
"attributes": {
"access_ip_v4": "10.10.10.5",
"access_ip_v6": "",
"all_metadata.%": "1",
"all_metadata.habitat": "sup",
"availability_zone": "nova",
"flavor_id": "eb36e84e-17c1-42ab-b359-4380f6f524ae",
"flavor_name": "m1.large",
"force_delete": "false",
"id": "597e75e8-834d-4f05-8408-e2e6e733577e",
"image_id": "c574aeed-e47c-4fb7-9da0-75550b76ee56",
"image_name": "ubuntu-16.04",
"key_pair": "vault-etcd_test_tf",
"metadata.%": "1",
"metadata.habitat": "sup",
"name": "ctl01",
"network.#": "1",
"network.0.access_network": "false",
"network.0.fixed_ip_v4": "10.10.10.5",
"network.0.fixed_ip_v6": "",
"network.0.floating_ip": "",
"network.0.mac": "02:c6:61:f9:ee:7e",
"network.0.name": "management",
"network.0.port": "",
"network.0.uuid": "f2468669-e321-4eb4-9ede-003e362a8988",
"region": "RegionOne",
"security_groups.#": "1",
"security_groups.1845949017": "vault-etcd_test_ssh_only",
"stop_before_destroy": "false"
},
"meta": {
"e2bfb730-ecaa-11e6-8f88-34363bc7c4c0": {
"create": 1800000000000,
"delete": 1800000000000,
"update": 1800000000000
}
},
"tainted": false
},
"deposed": [],
"provider": "provider.openstack"
}
Required is to take from "type": "openstack_compute_floatingip_v2" replace .primary.attributes.address and .fixed_ip and from corresponding .instance_id the .name.
So, sth like:
{"address": "209.66.89.143",
"fixed_ip": "10.10.10.5",
"name": "ctl01"}
Well, I came with an idea while using walk, but miss how to actually assign the proper value from corresponding instance id:
jq -r "$(cat floating.jq)" terraform.tfstate
floating.jq:
def walk(f):
. as $in
| if type == "object" then
reduce keys[] as $key
( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f
elif type == "array" then map( walk(f) ) | f
else f
end;
.modules?[]|.resources[]?|select(.type ==
"openstack_compute_floatingip_v2", .type ==
"openstack_compute_instance_v2")|
.primary|walk( if type == "object" and .attributes.address then
.attributes.instance_id |= "REFERRED VALUE HERE") else . end)
Let's assume the two related objects are in a file named two.json. Then one way to merge the information from both objects is using the -s command-line option, e.g.
jq -s '
(.[0].primary.attributes | {address, fixed_ip})
+ {name: .[1].primary.attributes.name}' two.json
Output
With your example input, the output would be:
{
"address": "209.66.89.143",
"fixed_ip": "10.10.10.5",
"name": "ctl01"
}

Resources