Combine multiple json to single json using jq - jq

I am new to jq and stuck with this problem for a while. Any help is appreciable.
I have two json files,
In file1.json:
{
"version": 4,
"group1": [
{
"name":"olditem1",
"content": "old content"
}
],
"group2": [
{
"name":"olditem2"
}
]
}
And in file2.json:
{
"group1": [
{
"name" : "newitem1"
},
{
"name":"olditem1",
"content": "new content"
}
],
"group2": [
{
"name" : "newitem2"
}
]
}
Expected result is:
{
"version": 4,
"group1": [
{
"name":"olditem1",
"content": "old content"
},
{
"name" : "newitem1"
}
],
"group2": [
{
"name":"olditem2"
},
{
"name" : "newitem2"
}
]
}
Criterial for merge:
Has to merge only group1 and group2
Match only by name
I have tried
jq -S '.group1+=.group1|.group1|unique_by(.name)' file1.json file2.json
but this is filtering group1 and all other info are lost.

This approach uses INDEX to create a dictionary of unique elements based on their .name field, reduce to iterate over the group fields to be considered, and an initial state created by combining the slurped (-s) input files using add after removing the group fileds to be processed separately using del.
jq -s '
[ "group1", "group2" ] as $gs | . as $in | reduce $gs[] as $g (
map(del(.[$gs[]])) | add; .[$g] = [INDEX($in[][$g][]; .name)[]]
)
' file1.json file2.json
{
"version": 4,
"group1": [
{
"name": "olditem1",
"content": "new content"
},
{
"name": "newitem1"
}
],
"group2": [
{
"name": "olditem2"
},
{
"name": "newitem2"
}
]
}
Demo

Related

JQ: Delete duplicate entry inplace

I am trying to delete a key whose value is duplicated elsewhere. That is I would like to delete all occurences(duplicates) after the first occurence. Here is a sample json file I am working with
{
"clouds":{
"finfolk-vmaas":{
"auth-types":[
"oauth1"
],
"endpoint":"http://10.125.0.10:5240/MAAS/",
"type":"maas"
},
"vsphere":{
"auth-types":[
"userpass"
],
"endpoint":"10.247.0.3",
"regions":{
"QA":{
"endpoint":"10.247.0.3"
}
},
"type":"vsphere"
}
}
}
I would like to get this after the deletion:
{
"clouds":{
"finfolk-vmaas":{
"auth-types":[
"oauth1"
],
"endpoint":"http://10.125.0.10:5240/MAAS/",
"type":"maas"
},
"vsphere":{
"auth-types":[
"userpass"
],
"endpoint":"10.247.0.3",
"regions":{
"QA":{}
},
"type":"vsphere"
}
}
}
Essentially I want to remove this duplicate key:pair "endpoint":"10.247.0.3" and leave the enclosing parentheses {}
Here is a simple jq query that I am trying to play with:
jq -cs 'unique_by(.endpoint)' clouds.json
For each object in .clouds[], this saves the object reduced to its enpoint as $endpoint, then recursively traverses to all child objects, from which, if it contains the previously stored endpoint, (only) the endpoint field will be deleted.
.clouds[] |= ({endpoint} as $endpoint | .[] |= walk(
(objects | select(contains($endpoint))) |= del(.endpoint)
))
{
"clouds": {
"finfolk-vmaas": {
"auth-types": [
"oauth1"
],
"endpoint": "http://10.125.0.10:5240/MAAS/",
"type": "maas"
},
"vsphere": {
"auth-types": [
"userpass"
],
"endpoint": "10.247.0.3",
"regions": {
"QA": {}
},
"type": "vsphere"
}
}
}
Demo

jq error cannot iterate over null but need to make a different choice

I have a jq filter that selects the rows I need. But sometimes these lines can be empty, and then everything breaks and the rule does not work. I tried to use the if-then-else construct but to no avail.
A rule that works if you process the following json:
.metadata.namespace as $ns | (.spec.rules[0].match.any[].resources.kinds[] / "/") | [select(.[1])[0] // null, select(.[2])[1] // null, last] as [$version,$group,$kind] | {namespace: $ns, kind: $kind, group: $version, version: $group} | with_entries(select(.value!=null))
suitable json:
{
"apiVersion": "kyverno.io/v1",
"kind": "posdfsdf",
"metadata": {
"name": "e-eion",
"namespace": "kke",
"annotations": {
"policies.kyverno.io/title": "Dation",
"policies.kyverno.io/category": "Pod Security Standards (Restricted)",
"policies.kyverno.io/severity": "medium",
"policies.kyverno.io/subject": "Pod",
"kyverno.io/kyverno-version": "1.6.0",
"kyverno.io/kubernetes-version": "1.22-1.23",
"policies.kyverno.io/description": "se`. "
}
},
"spec": {
"validationFailureAction": "audit",
"background": true,
"rules": [
{
"name": "tion",
"match": {
"any": [
{
"resources": {
"kinds": [
"Pod"
]
}
}
]
},
"validate": {
"message": "Prisd",
"pattern": {
"spec": {
"=(eners)": [
{
"secxt": {
"altion": "false"
}
}
],
"=(i)": [
{
"sext": {
"alcalation": "false"
}
}
],
"containers": [
{
"setext": {
"an": "false"
}
}
]
}
}
}
}
]
}
}
example on which the rule stops working:
{
"apiVersion": "k/v1",
"kind": "Picy",
"metadata": {
"name": "denylation",
"namespace": "what",
},
"spec": {
"validationFailureAction": "audit",
"background": true,
"rules": [
{
"name": "deny-privilege-escalation",
"match": {
"resources": {
"kinds": [
"Pod"
]
}
},
"validate": {
"message": "Priviles[*].securityContext.allowPrind spec.initContalse`.",
"pattern": {
"spec": {
"=(iners)": [
{
"=(seext)": {
"=(aln)": "false"
}
}
],
"containers": [
{
"=(stext)": {
"=(al)": "false"
}
}
]
}
}
}
}
]
}
}
how can this be fixed? I need the rule to work out in any cases and give output
This should work :
.metadata.namespace as $ns |
((.spec.rules[0].match | .. | (objects | .resources.kinds[]?)) / "/") |
[select(.[1])[0] // null, select(.[2])[1] // null, last] as [$version,$group,$kind] |
{namespace: $ns, kind: $kind, group: $version, version: $group} |
with_entries(select(.value!=null))
The failing json gives the following error:
parse error: Expected another key-value pair at line 7, column 3
This is caused by the trailing comma found here:
"metadata": {
"name": "denylation",
"namespace": "what",
},
Removing that comma, the following error is thrown:
jq: error (at :44): Cannot iterate over null (null)
This is caused by the missing any key inside the match object.
We can 'catch' that error using a ? (docs):
(.spec.rules[0].match.any[]?.resources.kinds[] / "/")
^
But due to the missing key, the filter does not find anything and the output is empty.
Updated jqPlay

JQ filter specific item based on inner item

Having the following Array
[
[
{ "field" : { "name": "appname" }, "value": { "value" : "app1" } },
{ "field" : { "name": "appstat" }, "value": { "value" : "UP" } }
],
[
{ "field" : { "name": "appname" }, "value": { "value" : "app2" } },
{ "field" : { "name": "appstat" }, "value": { "value" : "DOWN" } }
],
[
{ "field" : { "name": "appname" }, "value": { "value" : "app3" } },
{ "field" : { "name": "appstat" }, "value": { "value" : "READY"} }
]
]
I want to be able to select on specific items based on the appname.
So i can do for example
jq .[] app3
response should be READY
This should bring you there
jq -r --arg q "app3" '
.[]
| select(.[] | .field.name == "appname" and .value.value == $q)
| .[]
| select(.field.name == "appstat").value.value
'
READY
Demo
However, your data structure seems rather complicated. You'd be better off (at least for this use case) with a simpler array of objects to lookup key-value pairs. For example, transform your input like so:
jq 'map(map({(first(.field.name)): first(.value.value)}) | add)'
[
{
"appname": "app1",
"appstat": "UP"
},
{
"appname": "app2",
"appstat": "DOWN"
},
{
"appname": "app3",
"appstat": "READY"
}
]
Demo
That way, your lookup would be as simple as
jq -r --arg q "app3" '.[] | select(.appname == $q).appstat'
READY
Demo

jq - extract multiple fields from a list, with a nested list of key/value pairs

I have the following structure:
{
"Subnets": [
{
"SubnetId": "foo1",
"Id": "bar1",
"Tags": [
{
"Key": "Name",
"Value": "foo"
},
{
"Key": "Status",
"Value": "dev"
}
]
},
{
"SubnetId": "foo2",
"Id": "bar2",
"Tags": [
{
"Key": "Name",
"Value": "foo"
},
{
"Key": "Status",
"Value": "dev"
}
]
}
]
}
I can extract multiple keys at the "top level" like so:
cat subnets.json| jq '.Subnets[] | "\(.Id) \(.SubnetId)"'
Anyone know how I can also display one of the tags by key name, let's say I also want the Status tag displayed on the same line as the Id and SubnetId.
Thx for any help,
Is this what you are looking for?
jq '.Subnets[] | "\(.Id) \(.SubnetId) \(.Tags | from_entries | .Status)"' subnets.json

Elasticsearch PHP longest prefix match

I am currently using the FOSElasticaBundle in Symfony2 and I am having a hard time trying to build a search to match the longest prefix.
I am aware of the 100 examples that are on the Internet to perform autocomplete-like searches using this. However, my problem is a little different.
In an autocomplete type of search the database holds the longest alphanumeric string (in length of characters) and the user just provides the shortest portion, let's say the user types "jho" and Elasticsearch can easily provide "Jhon, Jhonny, Jhonas".
My problem is backwards, I would like to provide the longest alphanumeric string and I want Elasticsearch to provide me the biggest match in the database.
For example: I could provide "123456789" and my database can have [12,123,14,156,16,7,1234,1,67,8,9,123456,0], in this case the longest prefix match in the database for the number that the user provided is "123456".
I am just starting with Elasticsearch so I don't really have a close to working settings or anything.
If there is any information not clear or missing let me know and I will provide more details.
Update 1 (Using Val's 2nd Update)
Index: Download 1800+ indexes
Settings:
curl -XPUT localhost:9200/tests -d '{
"settings": {
"analysis": {
"analyzer": {
"edge_ngram_analyzer": {
"tokenizer": "edge_ngram_tokenizer",
"filter": [ "lowercase" ]
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "2",
"max_gram": "25"
}
}
}
},
"mappings": {
"test": {
"properties": {
"my_string": {
"type": "string",
"fields": {
"prefix": {
"type": "string",
"analyzer": "edge_ngram_analyzer"
}
}
}
}
}
}
}'
Query:
curl -XPOST localhost:9200/tests/test/_search?pretty=true -d '{
"size": 1,
"sort": {
"_script": {
"script": "doc.my_string.value.length()",
"type": "number",
"order": "desc"
},
"_score": "desc"
},
"query": {
"filtered": {
"query": {
"match": {
"my_string.prefix": "8092232423"
}
},
"filter": {
"script": {
"script": "doc.my_string.value.length() <= maxlength",
"params": {
"maxlength": 10
}
}
}
}
}
}'
With this configuration the query returns the following results:
{
"took" : 61,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1754,
"max_score" : null,
"hits" : [ {
"_index" : "tests",
"_type" : "test",
"_id" : "AU8LqQo4FbTZPxBtq3-Q",
"_score" : 0.13441172,
"_source":{"my_string":"80928870"},
"sort" : [ 8.0, 0.13441172 ]
} ]
}
}
Bonus question
I would like to provide an array of numbers for that search and get the matching prefix for each one in an efficient way without having to perform the query each time
Here is my take at it.
Basically, what we need to do is to slice and dice the field (called my_string below) at indexing time with an edgeNGram tokenizer (called edge_ngram_tokenizer below). That way a string like 123456789 will be tokenized to 12, 123, 1234, 12345, 123456, 1234567, 12345678, 123456789 and all tokens will be indexed and searchable.
So let's create a tests index, a custom analyzer called edge_ngram_analyzer analyzer and a test mapping containing a single string field called my_string. You'll note that the my_string field is a multi-field declaring a prefixes sub-field which will contain all the tokenized prefixes.
curl -XPUT localhost:9200/tests -d '{
"settings": {
"analysis": {
"analyzer": {
"edge_ngram_analyzer": {
"tokenizer": "edge_ngram_tokenizer",
"filter": [ "lowercase" ]
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "2",
"max_gram": "25"
}
}
}
},
"mappings": {
"test": {
"properties": {
"my_string": {
"type": "string",
"fields": {
"prefixes": {
"type": "string",
"index_analyzer": "edge_ngram_analyzer"
}
}
}
}
}
}
}
Then let's index a few test documents using the _bulk API:
curl -XPOST localhost:9200/tests/test/_bulk -d '
{"index":{}}
{"my_string":"12"}
{"index":{}}
{"my_string":"1234"}
{"index":{}}
{"my_string":"1234567890"}
{"index":{}}
{"my_string":"abcd"}
{"index":{}}
{"my_string":"abcdefgh"}
{"index":{}}
{"my_string":"123456789abcd"}
{"index":{}}
{"my_string":"abcd123456789"}
'
The thing that I found particularly tricky was that the matching result could be either longer or shorter than the input string. To achieve that we have to combine two queries, one looking for shorter matches and another for longer matches. So the match query will find documents with shorter "prefixes" matching the input and the query_string query (with the edge_ngram_analyzer applied on the input string!) will search for "prefixes" longer than the input string. Both enclosed in a bool/should and sorted by a decreasing string length (i.e. longest first) will do the trick.
Let's do some queries and see what unfolds:
This query will return the one document with the longest match for "123456789", i.e. "123456789abcd". In this case, the result is longer than the input.
curl -XPOST localhost:9200/tests/test/_search -d '{
"size": 1,
"sort": {
"_script": {
"script": "doc.my_string.value.length()",
"type": "number",
"order": "desc"
}
},
"query": {
"bool": {
"should": [
{
"match": {
"my_string.prefixes": "123456789"
}
},
{
"query_string": {
"query": "123456789",
"default_field": "my_string.prefixes",
"analyzer": "edge_ngram_analyzer"
}
}
]
}
}
}'
The second query will return the one document with the longest match for "123456789abcdef", i.e. "123456789abcd". In this case, the result is shorter than the input.
curl -XPOST localhost:9200/tests/test/_search -d '{
"size": 1,
"sort": {
"_script": {
"script": "doc.my_string.value.length()",
"type": "number",
"order": "desc"
}
},
"query": {
"bool": {
"should": [
{
"match": {
"my_string.prefixes": "123456789abcdef"
}
},
{
"query_string": {
"query": "123456789abcdef",
"default_field": "my_string.prefixes",
"analyzer": "edge_ngram_analyzer"
}
}
]
}
}
}'
I hope that covers it. Let me know if not.
As for your bonus question, I'd simply suggest using the _msearch API and sending all queries at once.
UPDATE: Finally, make sure that scripting is enabled in your elasticsearch.yml file using the following:
# if you have ES <1.6
script.disable_dynamic: false
# if you have ES >=1.6
script.inline: on
UPDATE 2 I'm leaving the above as the use case might fit someone else's needs. Now, since you only need "shorter" prefixes (makes sense !!), we need to change the mapping a little bit and the query.
The mapping would be like this:
{
"settings": {
"analysis": {
"analyzer": {
"edge_ngram_analyzer": {
"tokenizer": "edge_ngram_tokenizer",
"filter": [
"lowercase"
]
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "2",
"max_gram": "25"
}
}
}
},
"mappings": {
"test": {
"properties": {
"my_string": {
"type": "string",
"fields": {
"prefixes": {
"type": "string",
"analyzer": "edge_ngram_analyzer" <--- only change
}
}
}
}
}
}
}
And the query would now be a bit different but will always return only the longest prefix but shorter or of equal length to the input string. Please try it out. I advise to re-index your data to make sure everything is setup properly.
{
"size": 1,
"sort": {
"_script": {
"script": "doc.my_string.value.length()",
"type": "number",
"order": "desc"
},
"_score": "desc" <----- also add this line
},
"query": {
"filtered": {
"query": {
"match": {
"my_string.prefixes": "123" <--- input string
}
},
"filter": {
"script": {
"script": "doc.my_string.value.length() <= maxlength",
"params": {
"maxlength": 3 <---- this needs to be set to the length of the input string
}
}
}
}
}
}

Resources