Gremlin filter by count - azure-cosmosdb

With the usage of this query in CosmosDB Gremlin API:
g.V().has('person', 'name', 'John').as('his')
.out('bought').aggregate('self')
.out('made_by')
I have next output:
[
{
"id": "100",
"label": "brand",
"type": "vertex",
"properties": {
"name": [
{
"id": "233b77e7-7007-4c08-8930-99b25b67e493",
"value": "Apple"
}
]
}
},
{
"id": "100",
"label": "brand",
"type": "vertex",
"properties": {
"name": [
{
"id": "233b77e7-7007-4c08-8930-99b25b67e493",
"value": "Apple"
}
]
}
},
{
"id": "101",
"label": "brand",
"type": "vertex",
"properties": {
"name": [
{
"id": "f3e238e2-f274-489c-a69c-f1333403ee8e",
"value": "Google"
}
]
}
}
]
Is there a way to select only brands, which quantity is > 1 (Apple in this case)?

I think that you just need to groupCount() and then use a filter:
g.V().has('person', 'name', 'John').as('his').
out('bought').aggregate('self').
out('made_by').
groupCount().
unfold().
where(select(values).is(gt(1))).
select(keys)
You could just groupCount() and then unfold() the resulting Map so that you can filter the entries with where().

Related

How to project values from a Gremlin traversal with nested and()/or() steps

I have the graph model below which represents the sub-pattern I'd like to traverse or fetch. The nodes and their properties are shown below as well.
The expected response to my query would look something like this:
where 's', 'c', 'aid', 'qid', 'p', 'r1', 'r2' are the nodes that make up the subpattern or subgraph.
[
{
"s": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "severity",
"type": "vertex",
"properties": {
"severity": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "High"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"c": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "cve",
"type": "vertex",
"properties": {
"cve_id": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "CVE-xxxx-xxxx"
}
],
"publishedOn": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"aid": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "aid",
"type": "vertex",
"properties": {
"aid": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"qid": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "qid",
"type": "vertex",
"properties": {
"qid": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"p": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "package",
"type": "vertex",
"properties": {
"name": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxxx"
}
],
"version": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"r1": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "release",
"type": "vertex",
"properties": {
"source": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
],
"status": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
"r2": {
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4",
"label": "release",
"type": "vertex",
"properties": {
"source": [
{
"id": "a6a9e38f-0802-48b6-ac37-490f45e824e9",
"value": "xxxx-xxxx"
}
],
"status": [
{
"id": "fc5dde4d-c027-4c19-9b16-b3314b2b10c6",
"value": "xxx"
}
],
"pk": [
{
"id": "345fbdad-9c67-47bb-9f3b-cf50c8cdbee4|pk",
"value": "pk"
}
]
}
},
{
....
....
},
{
....
..
}
]
My question is how do I build my traversal query to achieve this end result?
What I have so far is this, but the project() step is not working as expected
g.V().hasLabel('cve').as('c').and(
__.in('severity').as('s'),
__.out('cve_to_aid').as('aid').and(
__.out('has_qid').as('qid'),
__.in('package_to_aid').as('p'),
or(
__.in('r1_to_aid').has('status', 'Patched').as('r1'),
__.in('r2_to_aid').has('status', 'Patched').as('r2')
)
)
).project('c', 's', 'aid', 'qid', 'p', 'r1', 'r2').
by(('c').values('cve_id')).
by(('s').values('severity')).
by(('aid').values('aid')).
by(('qid').values('qid')).
by(('p').values()).
by(('r1').values()).
by(('r2').values()).
I am doing this on CosmosDB, so please only provide answers using supported steps found here: https://learn.microsoft.com/en-us/azure/cosmos-db/gremlin/support
It is possible to nest project() steps, e.g. on the TinkerGraph:
gremlin> g = TinkerFactory.createModern().traversal()
==>graphtraversalsource[tinkergraph[vertices:6 edges:6], standard]
gremlin> g.V(1).as('x').project('x').by(
select('x').project('id', 'label','properties').by(id).by(label).by(
project('name').by(properties())
)
)
==>[x:[id:1,label:person,properties:[name:vp[name->marko]]]]
gremlin>
but then you end up coding your entire data model into your query.
In full TinkerPop you could turn your result into a subGraph() and write it to graphSon with the io() step. In Cosmos you can add the returned vertices to a TinkerGraph instance clientside and again use the io() step to serialize the TinkerGraph to graphSon.

Merge all objects inside an array that share the same key

I'm trying to deduplicate all objects inside the array results that share the same key id, and merge their path arrays.
JSON input:
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": "/some/path/a"
},
{
"id": "apple1",
"name": "appleName1",
"path": "/some/path/b"
},
{
"id": "apple2",
"name": "appleName2",
"path": "/some/path/c"
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": "/some/path/a"
},
{
"id": "orange1",
"name": "orangeName1",
"path": "/some/path/b"
},
{
"id": "orange2",
"name": "orangeName2",
"path": "/some/path/c"
}
]
}
]
Expected output:
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "apple2",
"name": "appleName2",
"path": [
"/some/path/c"
]
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "orange2",
"name": "orangeName2",
"path": [
"/some/path/c"
]
}
]
}
]
I've managed to get an approximate solution using:
jq '[{type: .[].type, results: .[].results | group_by(.id) | map({id: .[0].id, name: .[0].name, path: (map(.path))})}]'
But my solution produces two additional elements that aren't supposed to be there.
I know there are some similar questions already answered but I didn't manage to get them to work with this example. Any help is appreciated!
You could group_by the .id field, then for each group take the first item and replace its .path field with a map on the .path fields of all group members:
jq 'map(.results |= (group_by(.id) | map(first + {path: map(.path)})))'
[
{
"type": "apple",
"results": [
{
"id": "apple1",
"name": "appleName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "apple2",
"name": "appleName2",
"path": [
"/some/path/c"
]
}
]
},
{
"type": "orange",
"results": [
{
"id": "orange1",
"name": "orangeName1",
"path": [
"/some/path/a",
"/some/path/b"
]
},
{
"id": "orange2",
"name": "orangeName2",
"path": [
"/some/path/c"
]
}
]
}
]
Demo

Need help parsing json output with jq for a complex json

For the below JSON, I need the result.id and result.name output using jq for the ones having
authorization.roles[].name == "Supervisor"
What is the command for jq to to that ? For the below json we expect 1231 id and name AAAA alone as output as that only has Supervisor as role
{
"results": [{
"id": "1231",
"name": "AAAA",
"div": {
"id": "AAA",
"name": "DDSAA",
"selfUri": ""
},
"chat": {
"jabberId": "nn"
},
"department": "Shared Services Organization",
"email": "Test#gmail.com",
"primaryContactInfo": [{
"address": "Test#gmail.com",
"mediaType": "EMAIL",
"type": "PRIMARY"
}],
"addresses": [],
"state": "active",
"title": "AAA",
"username": "Test#gmail.com",
"version": 27,
"authorization": {
"roles": [{
"id": "01256689-c5ed-43a5-b370-58522402830d",
"name": "AA"
}, {
"id": "1e65b009-9f8f-4eef-9844-83944002c095",
"name": "BBB"
}, {
"id": "8a19f1ff-40e5-45d2-b758-14550a173323",
"name": "CCC"
}, {
"id": "d02250e2-7071-46bf-885b-43edff2d88a6",
"name": "Supervisor"
}]
}
}, {
"id": "1255",
"name": "BBBB",
"div": {
"id": "AAA",
"name": "DDSAA",
"selfUri": ""
},
"chat": {
"jabberId": "nn"
},
"department": "Shared Services Organization",
"email": "Test#gmail.com",
"primaryContactInfo": [{
"address": "Test#gmail.com",
"mediaType": "EMAIL",
"type": "PRIMARY"
}],
"addresses": [],
"state": "active",
"title": "AAA",
"username": "Test#gmail.com",
"version": 27,
"authorization": {
"roles": [{
"id": "01256689-c5ed-43a5-b370-58522402830d",
"name": "AA"
}, {
"id": "1e65b009-9f8f-4eef-9844-83944002c095",
"name": "BBB"
}, {
"id": "8a19f1ff-40e5-45d2-b758-14550a173323",
"name": "CCC"
}, {
"id": "d02250e2-7071-46bf-885b-43edff2d88a6",
"name": "Tester"
}]
}
}]
}
Don't put commas before closing brackets or curly braces (it's not valid JSON). Your input should look like this:
{
"results": [
{
"id": "1231",
"name": "AAAA",
"div": {
"id": "AAA",
"name": "DDSAA",
"selfUri": ""
},
"chat": {
"jabberId": "nn"
},
"department": "Shared Services Organization",
"email": "Test#gmail.com",
"primaryContactInfo": [
{
"address": "Test#gmail.com",
"mediaType": "EMAIL",
"type": "PRIMARY"
}
],
"addresses": [],
"state": "active",
"title": "AAA",
"username": "Test#gmail.com",
"version": 27,
"authorization": {
"roles": [
{
"id": "01256689-c5ed-43a5-b370-58522402830d",
"name": "AA"
},
{
"id": "1e65b009-9f8f-4eef-9844-83944002c095",
"name": "BBB"
},
{
"id": "8a19f1ff-40e5-45d2-b758-14550a173323",
"name": "CCC"
},
{
"id": "d02250e2-7071-46bf-885b-43edff2d88a6",
"name": "Supervisor"
}
]
}
},
{
"id": "1255",
"name": "BBBB",
"div": {
"id": "AAA",
"name": "DDSAA",
"selfUri": ""
},
"chat": {
"jabberId": "nn"
},
"department": "Shared Services Organization",
"email": "Test#gmail.com",
"primaryContactInfo": [
{
"address": "Test#gmail.com",
"mediaType": "EMAIL",
"type": "PRIMARY"
}
],
"addresses": [],
"state": "active",
"title": "AAA",
"username": "Test#gmail.com",
"version": 27,
"authorization": {
"roles": [
{
"id": "01256689-c5ed-43a5-b370-58522402830d",
"name": "AA"
},
{
"id": "1e65b009-9f8f-4eef-9844-83944002c095",
"name": "BBB"
},
{
"id": "8a19f1ff-40e5-45d2-b758-14550a173323",
"name": "CCC"
},
{
"id": "d02250e2-7071-46bf-885b-43edff2d88a6",
"name": "Tester"
}
]
}
}
]
}
Then, you can use select to narrow down your target objects (here using any to check if at least one of the role names matches your string -- thx #ikegami), then output any part of the resulting object(s):
jq '
.results[]
| select(any(.authorization.roles[]; .name == "Supervisor"))
| {id, name}
'
{
"id": "1231",
"name": "AAAA"
}
Demo
If instead of a JSON output you need raw text, use the -r (or --raw-output) flag, and provide the fields you are interested in:
jq -r '
.results[]
| select(any(.authorization.roles[]; .name == "Supervisor"))
| .id, .name
'
1231
AAAA
Demo

Get the value after group by in gremlin?

g.V('JobDefinition1').out("JobDefinitionToJobHistory").has("Timestamp", between("2022-02-01T00:00:00Z", "2022-02-03T00:00:00Z")).group().by("ttl").by(limit(1))
I had a gremlin query above and get the result below.
[
{
"776": [
{
"id": "JobHistory-2-1-2022 12:19:15 AM",
"label": "JobHistory",
"type": "vertex",
"properties": {
"Timestamp": [
{
"id": "6d187ccf-160d-4d87-a360-48526b7a1461",
"value": "2022-02-01T00:00:00Z"
}
],
"ttl": [
{
"id": "JobHistory-2-1-2022 12:19:15 AM|ttl",
"value": "776"
}
]
}
}
],
"888": [
{
"id": "JobHistory-2-1-2022 12:19:15 AM",
"label": "JobHistory",
"type": "vertex",
"properties": {
"Timestamp": [
{
"id": "6d187ccf-160d-4d87-a360-48526b7a1461",
"value": "2022-02-01T00:00:00Z"
}
],
"ttl": [
{
"id": "JobHistory-2-1-2022 12:19:15 AM|ttl",
"value": "888"
}
]
}
}
]
}
]
But I want to only get the value of the result after group by, the excepted result is shown below. I want the groupby result value without the key(as you can see, the excepted result don't have key info such as "776" and "888"). Is there any gremlin method to help me achieve this goal. Hope you can give me some help. Thanks!
[
{
"id": "JobHistory-2-1-2022 12:19:15 AM",
"label": "JobHistory",
"type": "vertex",
"properties": {
"Timestamp": [
{
"id": "6d187ccf-160d-4d87-a360-48526b7a1461",
"value": "2022-02-01T00:00:00Z"
}
],
"ttl": [
{
"id": "JobHistory-2-1-2022 12:19:15 AM|ttl",
"value": "776"
}
]
}
}
,
{
"id": "JobHistory-2-1-2022 12:19:15 AM",
"label": "JobHistory",
"type": "vertex",
"properties": {
"Timestamp": [
{
"id": "6d187ccf-160d-4d87-a360-48526b7a1461",
"value": "2022-02-01T00:00:00Z"
}
],
"ttl": [
{
"id": "JobHistory-2-1-2022 12:19:15 AM|ttl",
"value": "888"
}
]
}
}
]
You can get values from a Map with select(values):
gremlin> g.V().groupCount().by(label)
==>[software:2,person:4]
gremlin> g.V().groupCount().by(label).select(values)
==>[2,4]

JSON path how to get value of #

I have the following JSON:
{
"code": 201,
"data": {
"type": "Agent",
"id": {
"#type": "Client",
"#id": "88",
"title": "Ing.",
"titleAfter": null,
"name": "Ján",
"surname": "Kašperan",
"idNumber": "8706229411",
"dateOfBirth": null,
"idCardNumber": "OP12345",
"idCardExpirationDate": null,
"idCardType": {
"#type": "IdCardType",
"#id": 1,
"name": "id_card_type.id_card",
"shortName": "OP"
},
"type": 1
}
}
}
I would like to write a JSON path expression so that I want to get the value of "#id": "88",.
My JSON path do not work: $.data.id.#id
Since # is not allowed, access it by the string property name. More information about property accessors can be found on MDN
var json = {
"code": 201,
"data": {
"type": "Agent",
"id": {
"#type": "Client",
"#id": "88",
"title": "Ing.",
"titleAfter": null,
"name": "Ján",
"surname": "Kašperan",
"idNumber": "8706229411",
"dateOfBirth": null,
"idCardNumber": "OP12345",
"idCardExpirationDate": null,
"idCardType": {
"#type": "IdCardType",
"#id": 1,
"name": "id_card_type.id_card",
"shortName": "OP"
},
"type": 1
}
}
}
console.log(json.data.id["#id"])

Resources