CRUD support for list of Dicts - weaviate

My goal is to add Weaviate support to the pyLodStorage project
Specifically I'd like to use the sample data from:
https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample.py
Which has
a few records of Persons from the Royal family
a city list with a few thousand entries
an artificial list of records with as many records as you wish
as examples.
All data is tabular. Some basic python types like:
str
bool
int
float
date
datetime
need to be supported.
I created the project http://wiki.bitplan.com/index.php/DgraphAndWeaviateTest and a script to run Weaviate via docker compose. There is a python unit test which used to work with the Weaviate Python client 0.4.1
I am trying to use the information from https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html to refactor this unit test but don't know how to do it.
What needs to be done to get the CRUD tests running as e.g. in the other three tests:
https://github.com/WolfgangFahl/pyLoDStorage/tree/master/tests
for
JSON
SPARQL
SQL
i am especially interested in the "round-trip" handling of list of dicts (aka "Table") with the standard data types above. So I'd like to create a list of dicts and then:
derive the schema automatically by looking at some sample records
check if the schema already exists and if delete it
create the schema
check if the data already exits and if delete it
add the data and store it
optionaly store the schema for further reference
restore the data with or without using the schema information
check that the restored data (list of Dicts) is the same as the original data
Created on 2020-07-24
#author: wf
'''
import unittest
import weaviate
import time
#import getpass
class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
def setUp(self):
self.port=8153
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
w = self.getClient()
#contains_schema = w.schema.contains()
try:
w.create_schema("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json")
except:
pass
entries=[
[ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
[ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
[ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
w.create(dict,type,uid)
except weaviate.exceptions.ThingAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
"actions": {"classes": [],"type": "action"},
"things": {"classes": [{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
"cardinality": "atMostOne",
"dataType": ["text"],
"description": "The name of this person",
"name": "name"
}
]}],
"type": "thing"
}
}
w.create_schema(schema)
w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
w.create_thing({"name": "Alan Turing"}, "Person")
w.create_thing({"name": "John von Neumann"}, "Person")
w.create_thing({"name": "Tim Berners-Lee"}, "Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
"things": {
"type": "thing",
"classes": [
{
"class": "Event",
"description": "event",
"properties": [
{
"name": "acronym",
"description": "acronym",
"dataType": [
"text"
]
},
{
"name": "inCity",
"description": "city reference",
"dataType": [
"City"
],
"cardinality": "many"
}
]
},
{
"class": "City",
"description": "city",
"properties": [
{
"name": "name",
"description": "name",
"dataType": [
"text"
]
},
{
"name": "hasEvent",
"description": "event references",
"dataType": [
"Event"
],
"cardinality": "many"
}
]
}
]
}
}
client = self.getClient()
if not client.contains_schema():
client.create_schema(schema)
event = {"acronym": "example"}
client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

The unit test for the connection, schema and data objects you show above works like this with the Python client v1.x (see the inline comments for what's changed):
import unittest
import weaviate
import time
#import getpass
class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
def setUp(self):
self.port=8153
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
w = self.getClient()
#contains_schema = w.schema.contains()
try:
w.schema.create("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json") # instead of w.create_schema, see https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html#creating-your-first-schema-with-the-python-client
except:
pass
entries=[
[ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
[ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
[ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
w.data_object.create(dict,type,uid) # instead of w.create(dict,type,uid), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#example-request-1
except weaviate.exceptions.ThingAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
"actions": {"classes": [],"type": "action"},
"things": {"classes": [{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
"cardinality": "atMostOne",
"dataType": ["text"],
"description": "The name of this person",
"name": "name"
}
]}],
"type": "thing"
}
}
w.schema.create(schema) # instead of w.create_schema(schema)
w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person") # instead of w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
w.data_object.create({"name": "Alan Turing"}, "Person")
w.data_object.create({"name": "John von Neumann"}, "Person")
w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
"things": {
"type": "thing",
"classes": [
{
"class": "Event",
"description": "event",
"properties": [
{
"name": "acronym",
"description": "acronym",
"dataType": [
"text"
]
},
{
"name": "inCity",
"description": "city reference",
"dataType": [
"City"
],
"cardinality": "many"
}
]
},
{
"class": "City",
"description": "city",
"properties": [
{
"name": "name",
"description": "name",
"dataType": [
"text"
]
},
{
"name": "hasEvent",
"description": "event references",
"dataType": [
"Event"
],
"cardinality": "many"
}
]
}
]
}
}
client = self.getClient()
if not client.contains_schema():
client.schema.create(schema) # instead of client.create_schema(schema)
event = {"acronym": "example"}
client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde"), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#add-a-cross-reference
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
There's no support for automatically deriving a schema from a list of dict (or other formats) yet. This could, as you mention, be a good convenience feature, so we add this to Weaviate's feature suggestions!

The new version of Weaviate is now available (v1.2.1 is the latest release at the time of writing this). With this version a lot of things were removed and even more added. One of the major breaking change is that actions and things were removed, objects were introduced instead. All the changes and features for weaviate v1.2 can be used with weaviate-client python library v2.3.
Most of the current weaviate-client functionality is explained and showed how it works in this article.
Here is the same unittests but for Weaviate v1.2.1 and written using weaviate-client v2.3.1:
import unittest
import weaviate
import time
#import getpass
person_schema = {
"classes": [
{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
"name": "name",
"description": "The name of this person",
"dataType": ["text"]
}
]
},
{
"class": "Group",
"description": "A set of persons who are associated with each other over some common properties",
"properties": [
{
"name": "name",
"description": "The name under which this group is known",
"dataType": ["text"]
},
{
"name": "members",
"description": "The persons that are part of this group",
"dataType": ["Person"]
}
]
}
]
}
class TestWeaviate(unittest.TestCase):
# NEW link to the page
# https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
def setUp(self):
self.port=8080
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
# NEW link to the page
# https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
w = self.getClient()
#contains_schema = w.schema.contains()
# it is a good idea to check if Weaviate has a schema already when testing, otherwise it will result in an error
# this way you know for sure that your current schema is known to weaviate.
if w.schema.contains():
# delete the existing schema, (removes all the data objects too)
w.schema.delete_all()
# instead of w.create_schema(person_schema)
w.schema.create(person_schema)
entries=[
[ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
[ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
[ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
# instead of w.create(dict,type,uid), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#create-a-data-object
w.data_object.create(dict,type,uid)
# ObjectAlreadyExistsException is the correct exception starting weaviate-client 2.0.0
except weaviate.exceptions.ObjectAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
#"actions": {"classes": [],"type": "action"}, `actions` and `things` were removed in weaviate v1.0 and removed in weaviate-client v2.0
# Now there is only `objects`
"classes": [
{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
#"cardinality": "atMostOne", were removed in weaviate v1.0 and weaviate-client v2.0
"dataType": ["text"],
"description": "The name of this person",
"name": "name"
}
]
}
]
}
# instead of w.create_schema(schema)
w.schema.create(schema)
# instead of w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person")
w.data_object.create({"name": "Alan Turing"}, "Person")
w.data_object.create({"name": "John von Neumann"}, "Person")
w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
# "things": { , were removed in weaviate v1.0 and weaviate-client v2.0
# "type": "thing", was removed in weaviate v1.0 and weaviate-client v2.0
"classes": [
{
"class": "Event",
"description": "event",
"properties": [
{
"name": "acronym",
"description": "acronym",
"dataType": [
"text"
]
},
{
"name": "inCity",
"description": "city reference",
"dataType": [
"City"
],
# "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
}
]
},
{
"class": "City",
"description": "city",
"properties": [
{
"name": "name",
"description": "name",
"dataType": [
"text"
]
},
{
"name": "hasEvent",
"description": "event references",
"dataType": [
"Event"
],
# "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
}
]
}
]
}
client = self.getClient()
# this test is going to fail if you are using the same Weaviate instance
# We already created a schema in the test above so the new schme is not going to be created
# and will result in an error.
# we can delete the schema and create a new one.
# instead of client.contains_schema()
if client.schema.contains():
# delete the existing schema, (removes all the data objects too)
client.schema.delete_all()
# instead of client.create_schema(schema)
client.schema.create(schema)
event = {"acronym": "example"}
# instead of client.create(...)
client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
# instead of client.add_reference(...), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#cross-references
client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

Related

Getting a specific item in a sub array and selecting one value from it

I want to get the boardgame rank (value) from this nested array in Cosmos DB.
{
"name": "Alpha",
"statistics": {
"numberOfUserRatingVotes": 4155,
"averageRating": 7.26201,
"baysianAverageRating": 6.71377,
"ratingStandardDeviation": 1.18993,
"ratingMedian": 0,
"rankings": [
{
"id": 1,
"name": "boardgame",
"friendlyName": "Board Game Rank",
"type": "subtype",
"value": 746
},
{
"id": 4664,
"name": "wargames",
"friendlyName": "War Game Rank",
"type": "family",
"value": 140
},
{
"id": 5497,
"name": "strategygames",
"friendlyName": "Strategy Game Rank",
"type": "family",
"value": 434
}
],
"numberOfComments": 1067,
"weight": 2.3386,
"numberOfWeightVotes": 127
},
}
So I want:
{
"name": "Alpha",
"rank": 746
}
Using this query:
SELECT g.name, r
FROM Games g
JOIN r IN g.statistics.rankings
WHERE r.name = 'boardgame'
I get this (so close!):
{
"name": "Alpha",
"r": {
"id": 1,
"name": "boardgame",
"friendlyName": "Board Game Rank",
"type": "subtype",
"value": 746
}
},
But extending the query to this:
SELECT g.name, r.value as rank
FROM Games g
JOIN r IN g.statistics.rankings
WHERE r.name = 'boardgame'
I get this error:
Failed to query item for container Games:
Message: {"errors":[{"severity":"Error","location":{"start":21,"end":26},"code":"SC1001","message":"Syntax error, incorrect syntax near 'value'."}]}
ActivityId: 0a0cb394-2fc3-4a67-b54c-4d02085b6878, Microsoft.Azure.Documents.Common/2.14.0
I don't understand why this doesn't work? I don't understand what the syntax error is. I tried adding square braces but that didn't help. Can some help me understand why I get this error and also how to achieve the output I'm looking for?
This should work,
SELECT g.name, r["value"] as rank
FROM Games g
JOIN r IN g.statistics.rankings
WHERE r.name = 'boardgame'

Convert data to Json with all objects included

I want to convert a feature file to json so that I can pass it to a javascript function in an RMD file.
However, the toJSON function seems to flatten it and remove many of the fields and structures as below. How can I convert it and keep it in tact, as it does if I write to a file using sf::st_write?
url <- 'https://opendata.arcgis.com/api/v3/datasets/bf9d32b1aa9941af84e6c2bf0c54b1bb_0/downloads/data?format=geojson&spatialRefId=4326'
ukWardShapes <- sf::st_read(url) %>%
head(2)
# Looks OK when written out
sf::st_write(ukWardShapes, "wardShapes.geojson")
# Converting to json with toJSON seems drop other top level fields (type, name, crs) and list the objects within features object,
# but without type, and puts all fields in properties at the top level of object.
json_data <- jsonlite::toJSON(ukWardShapes)
# I want to do this as I need to pass it to javascript within an RMD like this
htmltools::tags$script(paste0("var ukWardShapes = ", json_data, ";"))
# Output from st_write - with all the objects and fields listed properly
{
"type": "FeatureCollection",
"name": "wardShapes",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "OBJECTID": 1, "WD21CD": "E05000026", "WD21NM": "Abbey", "WD21NMW": " ", "BNG_E": 544433, "BNG_N": 184376, "LONG": 0.081276, "LAT": 51.53981, "SHAPE_Length": 0.071473941285613768, "SHAPE_Area": 0.00015225110241064838 }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 0.093628520000038, 51.53767283600007 ], [ 0.08163128800004, 51.539165094000055 ], [ 0.085507102000065, 51.537043160000053 ], [ 0.075954208000041, 51.533595714000057 ], [ 0.07333983500007, 51.537621201000036 ], [ 0.068771363000053, 51.536206993000064 ], [ 0.068303699000069, 51.544253423000043 ], [ 0.068361695000021, 51.544390390000046 ], [ 0.08006389600007, 51.544772356000067 ], [ 0.093628520000038, 51.53767283600007 ] ] ] ] } },
{ "type": "Feature", "properties": { "OBJECTID": 2, "WD21CD": "E05000027", "WD21NM": "Alibon", "WD21NMW": " ", "BNG_E": 549247, "BNG_N": 185196, "LONG": 0.150987, "LAT": 51.545921, "SHAPE_Length": 0.074652046036690151, "SHAPE_Area": 0.00017418950412786572 }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 0.161601914000073, 51.543327754000074 ], [ 0.147931795000034, 51.541598449000048 ], [ 0.140256898000075, 51.54111542000004 ], [ 0.13420572800004, 51.540716652000071 ], [ 0.131925236000029, 51.543763455000033 ], [ 0.14633003900002, 51.546332889000041 ], [ 0.142816723000067, 51.550973604000035 ], [ 0.156378253000071, 51.551020271000027 ], [ 0.161601914000073, 51.543327754000074 ] ] ] ] } }
]
}
# Output from toJson which seems to have a lot of structure removed. Note, I'm not
# concerned about it being pretty and separated into lines
[{
"OBJECTID":1, "WD21CD":"E05000026", "WD21NM":"Abbey", "WD21NMW":" ", "BNG_E":544433, "BNG_N":184376, "LONG":0.0813, "LAT":51.5398, "SHAPE_Length":0.0715, "SHAPE_Area":0.0002, "geometry":{
"type":"MultiPolygon", "coordinates":[[[[0.0936, 51.5377], [0.0816, 51.5392], [0.0855, 51.537], [0.076, 51.5336], [0.0733, 51.5376], [0.0688, 51.5362], [0.0683, 51.5443], [0.0684, 51.5444], [0.0801, 51.5448], [0.0936, 51.5377]]]]
}
}, {
"OBJECTID":2, "WD21CD":"E05000027", "WD21NM":"Alibon", "WD21NMW":" ", "BNG_E":549247, "BNG_N":185196, "LONG":0.151, "LAT":51.5459, "SHAPE_Length":0.0747, "SHAPE_Area":0.0002, "geometry":{
"type":"MultiPolygon", "coordinates":[[[[0.1616, 51.5433], [0.1479, 51.5416], [0.1403, 51.5411], [0.1342, 51.5407], [0.1319, 51.5438], [0.1463, 51.5463], [0.1428, 51.551], [0.1564, 51.551], [0.1616, 51.5433]]]]
}
}]
As per #SymbolixAU's comment above, the answer is to use
geojsonsf::sf_geojson() instead of jsonlite::toJSON() as geojson is a specific structure of JSON for spatial data and it needs a specific parser for it.
So my line of code should be:
json_data <- geojsonsf::sf_geojson(ukWardShapes)

how can i get the these data array in ui reactnative

i need to display the data in the frontend but
i get my data to lists[] state and console.log(this.state.lists) display below structure in console how can i get the those data in front end
Array [
Object {
"list": Array [
Object {
"id": "123",
"imageUrl": "http://www.pngmart.com/files/1/Pizza-Slice-PNG-Transparent-Image.png",
"name": "Chicken Devill pizza",
"price": 700,
"size": "Medium",
},
],
"uid": "xQ0Kg4PgYwVGFTTPGsXK1WHlJuM2",
},
Object {
"list": Array [
Object {
"id": "1234",
"imageUrl": "http://www.pngmart.com/files/1/Cheese-Pizza.png",
"name": "Cheese pork pizza",
"price": 1500,
"size": "Medium",
},
],
"uid": "xQ0Kg4PgYwVGFTTPGsXK1WHlJuM2",
},
]
it seems data format is wrong, however if you want to extract array info you can do it like this:
let finalArray = [];
for(let i=0;i<response.length;i++){
finalArray .push(response[i].list[0]);
=================
console.log(finalArray )

How to fix this Matchmaking Rule set for AWS Game Lift

I am new to Game Lift and am trying to make a ruleset for a Jeopardy game for a project I am creating. I try to apply what I want to do for the match making but I always seem to get this error and cannot figure out for the life of me what is wrong.
I am doing 3 players, each having near the same skill set so to keep it fair. Can someone explain what I am doing wrong?
I have already looked up all around the documentation of Game lift but I am still confused how this portion works. The examples they gave worked and I tried editing them to my own liking but it seems it did not work.
"name": "Normal_Game",
"ruleLanguageVersion": "1.0",
"playerAttributes": [{
"name": "skill",
"type": "number",
"default": 10
}],
"teams": [{
"name": "red",
"maxPlayers": 1,
"minPlayers": 1
}, {
"name": "blue",
"maxPlayers": 1,
"minPlayers": 1
},{
"name": "green",
"maxPlayers": 1,
"minPlayers":1
}],
"rules": [{
"name": "FairTeamSkill",
"description": "The average skill of players in each team is within 10 points from the average skill of all players in the match",
"type": "distance",
// get skill values for players in each team and average separately to produce list of two numbers
"measurements": [ "avg(teams[*].players.attributes[skill])" ],
// get skill values for players in each team, flatten into a single list, and average to produce an overall average
"referenceValue": "avg(flatten(teams[*].players.attributes[skill]))",
"maxDistance": 10 // minDistance would achieve the opposite result
}, {
"name": "EqualTeamSizes",
"description": "Only launch a game when the number of players in each team matches, e.g. 4v4, 5v5, 6v6, 7v7, 8v8",
"type": "comparison",
"measurements": [ "count(teams[red].players)" ],
"referenceValue": "count(teams[blue].players)",
"operation": "=" // other operations: !=, <, <=, >, >=
"referenceValue": "count(teams[green].players)",
"operation": "="
}],
"expansions": [{
"target": "rules[FairTeamSkill].maxDistance",
"steps": [{
"waitTimeSeconds": 5,
"value": 50
}, {
"waitTimeSeconds": 15,
"value": 100
}]
}]
}
I validate it all the time, expecting it to take it but it doesn't my error messages keep occurring as this:
Rule set*
Encountered JSON parsing error: Unexpected character ('"' (code 34)): was expecting comma to separate Object entries at [Source: { "name": "Normal_Game", "ruleLanguageVersion": "1.0", "playerAttributes": [{ "name": "skill", "type": "number", "default": 10 }], "teams": [{ "name": "red", "maxPlayers": 1, "minPlayers": 1 }, { "name": "blue", "maxPlayers": 1, "minPlayers": 1 },{ "name": "green", "maxPlayers": 1, "minPlayers":1 }], "rules": [{ "name": "FairTeamSkill", "description": "The average skill of players in each team is within 10 points from the average skill of all players in the match", "type": "distance", // get skill values for players in each team and average separately to produce list of two numbers "measurements": [ "avg(teams[*].players.attributes[skill])" ], // get skill values for players in each team, flatten into a single list, and average to produce an overall average "referenceValue": "avg(flatten(teams[*].players.attributes[skill]))", "maxDistance": 10 // minDistance would achieve the opposite result }, { "name": "EqualTeamSizes", "description": "Only launch a game when the number of players in each team matches, e.g. 4v4, 5v5, 6v6, 7v7, 8v8", "type": "comparison", "measurements": [ "count(teams[red].players)" ], "referenceValue": "count(teams[blue].players)", "operation": "=" // other operations: !=, <, <=, >, >= "referenceValue": "count(teams[green].players)", "operation": "=" }], "expansions": [{ "target": "rules[FairTeamSkill].maxDistance", "steps": [{ "waitTimeSeconds": 5, "value": 50 }, { "waitTimeSeconds": 15, "value": 100 }] }] }; line: 38, column: 10]
You seem to have these 2:
"referenceValue":
"operation":
defined twice in EqualTeamSizes rules, that might cause issues. And a missing comma after "operation": "="
{
"name": "EqualTeamSizes",
"description": "Only launch a game when the number of players in each team matches, e.g. 4v4, 5v5, 6v6, 7v7, 8v8",
"type": "comparison",
"measurements": [ "count(teams[red].players)" ],
"referenceValue": "count(teams[blue].players)",
"operation": "=" // other operations: !=, <, <=, >, >=
"referenceValue": "count(teams[green].players)",
"operation": "="
}

Want to output two values from each line of a huge JSONL file in R Studio

I'm walking through a huge JSONL file (100G, 100M rows) line by line extracting two key values from the data. Ideally, I want this written to a file with two columns. I'm a real beginner here.
Here is an example of the JSON on each row of the file referenced on my C drive:
https://api.unpaywall.org/v2/10.6118/jmm.2017.23.2.135?email=YOUR_EMAIL
or:
{
"best_oa_location": {
"evidence": "open (via page says license)",
"host_type": "publisher",
"is_best": true,
"license": "cc-by-nc",
"pmh_id": null,
"updated": "2018-02-14T11:18:21.978814",
"url": "FAKEURL",
"url_for_landing_page": "URL2",
"url_for_pdf": "URL4",
"version": "publishedVersion"
},
"data_standard": 2,
"doi": "10.6118/jmm.2017.23.2.135",
"doi_url": "URL5",
"genre": "journal-article",
"is_oa": true,
"journal_is_in_doaj": false,
"journal_is_oa": false,
"journal_issns": "2288-6478,2288-6761",
"journal_name": "Journal of Menopausal Medicine",
"oa_locations": [
{
"evidence": "open (via page says license)",
"host_type": "publisher",
"is_best": true,
"license": "cc-by-nc",
"pmh_id": null,
"updated": "2018-02-14T11:18:21.978814",
"url": "URL6",
"url_for_landing_page": "hURL7": "hURL8",
"version": "publishedVersion"
},
{
"evidence": "oa repository (via OAI-PMH doi match)",
"host_type": "repository",
"is_best": false,
"license": "cc-by-nc",
"pmh_id": "oai:pubmedcentral.nih.gov:5606912",
"updated": "2017-10-21T18:12:39.724143",
"url": "URL9",
"url_for_landing_page": "URL11",
"url_for_pdf": "URL12",
"version": "publishedVersion"
},
{
"evidence": "oa repository (via pmcid lookup)",
"host_type": "repository",
"is_best": false,
"license": null,
"pmh_id": null,
"updated": "2018-10-11T01:49:34.280389",
"url": "URL13",
"url_for_landing_page": "URL14",
"url_for_pdf": null,
"version": "publishedVersion"
}
],
"published_date": "2017-01-01",
"publisher": "The Korean Society of Menopause (KAMJE)",
"title": "A Case of Granular Cell Tumor of the Clitoris in a Postmenopausal Woman",
"updated": "2018-06-20T20:31:37.509896",
"year": 2017,
"z_authors": [
{
"affiliation": [
{
"name": "Department of Obstetrics and Gynecology, Soonchunhyang University Cheonan Hospital, University of Soonchunhyang College of Medicine, Cheonan, Korea."
}
],
"family": "Min",
"given": "Ji-Won"
},
{
"affiliation": [
{
"name": "Department of Obstetrics and Gynecology, Soonchunhyang University Cheonan Hospital, University of Soonchunhyang College of Medicine, Cheonan, Korea."
}
],
"family": "Kim",
"given": "Yun-Sook"
}
]
}
Here's the code i'm using/wrote:
library (magrittr)
library (jqr)
con = file("C:/users/ME/desktop/miniunpaywall.jsonl", "r");
while ( length(line <- readLines(con, n = -1)) > 0) {
write.table( line %>% jq ('.doi,.best_oa_location.license'), file='test.txt', quote=FALSE, row.names=FALSE);}
What results from this is a line of text for each row of JSON that looks like this:
"10.1016/j.ijcard.2018.10.014,CC-BY"
This is effectively:
"[DOI],[LICENSE]"
I want ideally to have the output be:
[DOI] tab [LICENSE]
I believe my problem is that I'm writing the values as a string into a single column when i say:
write.table( line %>% jq ('.doi,.best_oa_location.license')
I havent figured out a way to remove the quotes i'm getting around each line in my file or how i could separate the two values with a tab. I feel I'm pretty close. Help!

Resources