DynamoDB: list_append alternative for sets - amazon-dynamodb

I am trying to do an update operation on a dynamodb string set attribute. For lists, the operation would be
set #key = list_append(if_not_exists(#key, :empty_list), :newValue)
But this produces a list attribute. Is there an alternative for list_append but for sets?

Since DynamoDB can't store empty sets this is actually fairly easy, you can just use the ADD operator.
Here's an example I've built in Python:
import boto3
TABLE_NAME = "set-demo"
def create_table():
ddb = boto3.client("dynamodb")
ddb.create_table(
AttributeDefinitions=[
{"AttributeName": "PK", "AttributeType": "S"},
{"AttributeName": "SK", "AttributeType": "S"}
],
TableName=TABLE_NAME,
KeySchema=[
{"AttributeName": "PK", "KeyType": "HASH"},
{"AttributeName": "SK", "KeyType": "RANGE"}
],
BillingMode="PAY_PER_REQUEST"
)
def add_to_set(item_id: str, value: str):
table = boto3.resource("dynamodb").Table(TABLE_NAME)
table.update_item(
Key={
"PK": f"ITEM#{item_id}",
"SK": f"METADATA",
},
UpdateExpression="ADD #set_name :set_value",
ExpressionAttributeNames={
"#set_name": "values"
},
ExpressionAttributeValues={
":set_value": {value}, # needs to be a set type
}
)
if __name__ == "__main__":
# create_table()
add_to_set("a", "value_1")
add_to_set("a", "value_2")
add_to_set("a", "value_1")
In python it's sufficient to pass a value with the datatype set in the ExpressionAttributeValues for boto3 to know it needs to convert it into a set under the hood.
When I call add_to_set for the first time, it will create the set attribute and subsequent calls are just updates to the attribute.
This is what the item looks like in the end:
{
"PK": {
"S": "ITEM#a"
},
"SK": {
"S": "METADATA"
},
"values": {
"SS": [
"value_1",
"value_2"
]
}
}

Related

How to get sum of value from Json Array in Vue

My json array is
"test": [
{
"name": "item1",
"value": "440"
},
{
"name": "item2",
"value": "220"
},
{
"name": "item3",
"value": "46"
}
]
I'm calling function from template as
<template>
<td>{{ calculateSumofValues(test) }}</td>
</template>
Script function is
<script setup lang="ts">
const calculateSumofValues = (test?:Object) => {
console.log("test", test)
// calculate sum of values
};
</script>
I want to calculate total values of every item on the array (440+220+46 = 706)
Please help me to solve this issue.
You could use reduce() function to do something over every element in an array and accumulate results somehow, useful for summing over values in the array.
const test = [{
"name": "item1",
"value": "440"
},
{
"name": "item2",
"value": "220"
},
{
"name": "item3",
"value": "46"
}
]
const calculateSumofValues = (test) => {
console.log("test", test)
return test.reduce((acc, cur) => acc + parseInt(cur.value), 0)
};
console.log(calculateSumofValues(test))

Best way to retrieve document with nested JSON and limit

Suppose we have a structure:
{
"nested_items": [
{
"nested_sample0": "1",
"nested_sample1": "test",
"nested_sample2": "test",
"nested_sample3": {
"type": "type"
},
"nested_sample": null
},
{
"nested_sample0": "1",
"nested_sample1": "test",
"nested_sample2": "test",
"nested_sample3": {
"type": "type"
},
"nested_sample1": null
},
...
],
"sample1": 1233,
"id": "ed68ca34-6b59-4687-a557-bdefc9ec2f4b",
"sample2": "",
"sample3": "test",
"sample4": "test",
"_ts": 1656503348
}
I want to retrieve documents by id by with limit of "nested_items" field .As I know limit and offset not supported in sub queries. Any way to do this except of divide into two queries? Maybe some udf or else?
You can use the function ARRAY_SLICE assuming the array is ordered.
Example data:
{
"name": "John",
"details": [
{
"id": 1
},
{
"id": 2
},
{
"id": 3
}
]
}
Example queries
-- First 2 items from nested array
SELECT c.name, ARRAY_SLICE(c.details, 0, 2) as details
FROM c
-- Last 2 items from nested array
SELECT c.name, ARRAY_SLICE(c.details, ARRAY_LENGTH(c.details) - 2, 2) as details
FROM c

Dynamically Parse Child Nodes in JSON

I have a deserialized object that I want to dynamically loop through to return the related results. The response package looks like so:
{"RatingResponse":
{"Success":"true",
"Message":"",
"QuoteID":"57451",
"LoadNum":"57451",
"Rates":
{"Rate":
[
{"SCAC":"test1",
"CarrierName":"TEST1",
"TransitTime":"1",
"ServiceLevel":"D",
"TotalCost":"1,031.82",
"ThirdPartyCharge":"1,031.82",
"Accessorials":
{"Accessorial":
[
{"Code":"400",
"Cost":"1,655.55",
"Description":"Freight"
},
{"Code":"DSC",
"Cost":"-952.77",
"Description":"Discount"
},
{"Code":"FUE",
"Cost":"329.04",
"Description":"Fuel Surcharge"
}
]
},
"QuoteNumber":""
},
{"SCAC":"test2",
"CarrierName":"TEST2",
"TransitTime":"1",
"ServiceLevel":"D",
"TotalCost":"1,031.82",
"ThirdPartyCharge":"1,031.82",
"Accessorials":
{"Accessorial":
[
{"Code":"400",
"Cost":"1,655.55",
"Description":"Freight"
},
{"Code":"DSC",
"Cost":"-952.77",
"Description":"Discount"
},
{"Code":"FUE",
"Cost":"329.04",
"Description":"Fuel Surcharge"
}
]
},
"QuoteNumber":""
}
]
},
"AverageTotalCost":"1,031.82"
}
}
I have parsed the response data so that there is less information to work with, especially since I only need the Accessorial Costs. The parsed response looks like
[
{
"SCAC": "test1",
"CarrierName": "TEST1",
"TransitTime": "1",
"ServiceLevel": "D",
"TotalCost": "1,031.82",
"ThirdPartyCharge": "1,031.82",
"Accessorials": {
"Accessorial": [
{
"Code": "400",
"Cost": "1,655.55",
"Description": "Freight"
},
{
"Code": "DSC",
"Cost": "-952.77",
"Description": "Discount"
},
{
"Code": "FUE",
"Cost": "329.04",
"Description": "Fuel Surcharge"
}
]
},
"QuoteNumber": ""
},
{
"SCAC": "test2",
"CarrierName": "TEST2",
"TransitTime": "1",
"ServiceLevel": "D",
"TotalCost": "1,031.82",
"ThirdPartyCharge": "1,031.82",
"Accessorials": {
"Accessorial": [
{
"Code": "400",
"Cost": "1,655.55",
"Description": "Freight"
},
{
"Code": "DSC",
"Cost": "-952.77",
"Description": "Discount"
},
{
"Code": "FUE",
"Cost": "329.04",
"Description": "Fuel Surcharge"
}
]
},
"QuoteNumber": ""
}
]
The problem I am facing is that I will never know how many Rate items will come back in the response data, nor will I know the exact amount of Accessorial Costs. I'm hoping to capture the Rate child node counts and the Accessorial child node counts per Rate. Here's what I have so far.
Root rootObject = Newtonsoft.Json.JsonConvert.DeserializeObject<Root>(responseFromServer);
//rate stores the parsed response data
JArray rate = (JArray)JObject.Parse(responseFromServer)["RatingResponse"]["Rates"]["Rate"];
var rate2 = rate.ToString();
//this for loop works as expected. it grabs the number of Rate nodes (in this example, 2)
for (int i = 0; i < rate.Count(); i++)
{
dynamic test2 = rate[i];
//this is where I'm struggling
dynamic em = (JArray)JObject.Parse(test2)["Accessorials"]["Accessorial"].Count();
for (int j = 0; j < em; j++)
{
string test3 = test2.Accessorials.Accessorial[j].Cost;
System.IO.File.AppendAllText(logPath, Environment.NewLine + test3 + Environment.NewLine);
}
}
I apologize in advance for the bad formatting and odd variable names - I'm obviously still testing the functionality, so I've been using random variables.
Where I'm struggling (as notated above) is getting to the Accessorial node to count how many items are in its array. I was thinking I could parse the first array (starting with SCAC data) and extend down to the Accessorial node, but I'm not having any luck.
Any help is GREATLY appreciated, especially since I am new to this type of code and have spent the majority of the day trying to resolve this.
you can try this
var rates = (JArray)JObject.Parse(json)["RatingResponse"]["Rates"]["Rate"];
var costs = rates.Select(r => new
{
CarrierName = r["CarrierName"],
Costs = ((JArray)((JObject)r["Accessorials"])["Accessorial"])
.Where(r => (string)r["Description"] != "Discount")
.Select(r => (double)r["Cost"]).Sum()
}).ToList();
result
[
{
"CarrierName": "TEST1",
"Costs": 1984.59
},
{
"CarrierName": "TEST2",
"Costs": 1984.59
}
]

aws glue job to import dynamodb data

We are trying to do DynamoDB migration from prod account to stage account.
In the source account, we are making use of "Export" feature of DDB to put the compressed .json.gz files into destination S3 bucket.
We have written a glue script which will read the exported .json.gz files and writes it to DDB table.
We are making the code generic, so we should be able to migrate any DDB table from prod to stage account.
As part of that process, while testing we are facing issues when we are trying to write a NUMBER SET data to target DDB table.
Following is the sample snippet which is raising ValidationException when trying to insert into DDB
from decimal import Decimal
def number_set(datavalue):
# datavalue will be ['0', '1']
set_of_values = set()
for value in datavalue:
set_of_values.add(Decimal(value))
return set_of_values
When running the code, we are getting following ValidationException
An error occurred while calling o82.pyWriteDynamicFrame. Supplied AttributeValue is empty, must contain exactly one of the supported datatypes (Service: AmazonDynamoDBv2; Status Code: 400; Error Code: ValidationException; Request ID: UKEU70T0BLIKN0K2OL4RU56TGVVV4KQNSO5AEMVJF66Q9ASUAAJG; Proxy: null)
However, if instead of Decimal(value) if we use int(value) then no ValidationException is being thrown and the job succeeds.
I feel that write_dynamic_frame_from_options will try to infer schema based on the values the element contains, if the element has "int" values then the datatype would be "NS", but if the element contains all "Decimal type" values, then it is not able to infer the datatype.
The glue job we have written is
dyf = glue_context.create_dynamic_frame_from_options(
connection_type="s3",
connection_options={
"paths": [file_path]
},
format="json",
transformation_ctx = "dyf",
recurse = True,
)
def number_set(datavalue):
list_of_values = []
for value in datavalue:
list_of_values.append(Decimal(value))
print("list of values ")
print(list_of_values)
return set(list_of_values)
def parse_list(datavalue):
list_of_values = []
for object in datavalue:
list_of_values.append(generic_conversion(object))
return list_of_values
def generic_conversion(value_dict):
for datatype,datavalue in value_dict.items():
if datatype == 'N':
value = Decimal(datavalue)
elif datatype == 'S':
value = datavalue
elif datatype == 'NS':
value = number_set(datavalue)
elif datatype == 'BOOL':
value = datavalue
elif datatype == 'M':
value = construct_map(datavalue)
elif datatype == 'B':
value = datavalue.encode('ascii')
elif datatype == 'L':
value = parse_list(datavalue)
return value
def construct_map(row_dict):
ddb_row = {}
for key,value_dict in row_dict.items():
# value is a dict with key as N or S
# if N then use Decimal type
ddb_row[key] = generic_conversion(value_dict)
return ddb_row
def map_function(rec):
row_dict = rec["Item"]
return construct_map(row_dict)
mapped_dyF = Map.apply(frame = dyf, f = map_function, transformation_ctx = "mapped_dyF")
datasink2 = glue_context.write_dynamic_frame_from_options(
frame=mapped_dyF,
connection_type="dynamodb",
connection_options={
"dynamodb.region": "us-east-1",
"dynamodb.output.tableName": destination_table,
"dynamodb.throughput.write.percent": "0.5"
},
transformation_ctx = "datasink2"
)
can anyone help us in how can we unblock from this situation?
Record that we are trying to insert
{
"region": {
"S": "to_delete"
},
"date": {
"N": "20210916"
},
"number_set": {
"NS": [
"0",
"1"
]
},
"test": {
"BOOL": false
},
"map": {
"M": {
"test": {
"S": "value"
},
"test2": {
"S": "value"
},
"nestedmap": {
"M": {
"key": {
"S": "value"
},
"nestedmap1": {
"M": {
"key1": {
"N": "0"
}
}
}
}
}
}
},
"binary": {
"B": "QUFBY2Q="
},
"list": {
"L": [
{
"S": "abc"
},
{
"S": "def"
},
{
"N": "123"
},
{
"M": {
"key2": {
"S": "value2"
},
"nestedmaplist": {
"M": {
"key3": {
"S": "value3"
}
}
}
}
}
]
}
}

CRUD support for list of Dicts

My goal is to add Weaviate support to the pyLodStorage project
Specifically I'd like to use the sample data from:
https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample.py
Which has
a few records of Persons from the Royal family
a city list with a few thousand entries
an artificial list of records with as many records as you wish
as examples.
All data is tabular. Some basic python types like:
str
bool
int
float
date
datetime
need to be supported.
I created the project http://wiki.bitplan.com/index.php/DgraphAndWeaviateTest and a script to run Weaviate via docker compose. There is a python unit test which used to work with the Weaviate Python client 0.4.1
I am trying to use the information from https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html to refactor this unit test but don't know how to do it.
What needs to be done to get the CRUD tests running as e.g. in the other three tests:
https://github.com/WolfgangFahl/pyLoDStorage/tree/master/tests
for
JSON
SPARQL
SQL
i am especially interested in the "round-trip" handling of list of dicts (aka "Table") with the standard data types above. So I'd like to create a list of dicts and then:
derive the schema automatically by looking at some sample records
check if the schema already exists and if delete it
create the schema
check if the data already exits and if delete it
add the data and store it
optionaly store the schema for further reference
restore the data with or without using the schema information
check that the restored data (list of Dicts) is the same as the original data
Created on 2020-07-24
#author: wf
'''
import unittest
import weaviate
import time
#import getpass
class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
def setUp(self):
self.port=8153
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
w = self.getClient()
#contains_schema = w.schema.contains()
try:
w.create_schema("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json")
except:
pass
entries=[
[ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
[ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
[ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
w.create(dict,type,uid)
except weaviate.exceptions.ThingAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
"actions": {"classes": [],"type": "action"},
"things": {"classes": [{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
"cardinality": "atMostOne",
"dataType": ["text"],
"description": "The name of this person",
"name": "name"
}
]}],
"type": "thing"
}
}
w.create_schema(schema)
w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
w.create_thing({"name": "Alan Turing"}, "Person")
w.create_thing({"name": "John von Neumann"}, "Person")
w.create_thing({"name": "Tim Berners-Lee"}, "Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
"things": {
"type": "thing",
"classes": [
{
"class": "Event",
"description": "event",
"properties": [
{
"name": "acronym",
"description": "acronym",
"dataType": [
"text"
]
},
{
"name": "inCity",
"description": "city reference",
"dataType": [
"City"
],
"cardinality": "many"
}
]
},
{
"class": "City",
"description": "city",
"properties": [
{
"name": "name",
"description": "name",
"dataType": [
"text"
]
},
{
"name": "hasEvent",
"description": "event references",
"dataType": [
"Event"
],
"cardinality": "many"
}
]
}
]
}
}
client = self.getClient()
if not client.contains_schema():
client.create_schema(schema)
event = {"acronym": "example"}
client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
The unit test for the connection, schema and data objects you show above works like this with the Python client v1.x (see the inline comments for what's changed):
import unittest
import weaviate
import time
#import getpass
class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
def setUp(self):
self.port=8153
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
w = self.getClient()
#contains_schema = w.schema.contains()
try:
w.schema.create("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json") # instead of w.create_schema, see https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html#creating-your-first-schema-with-the-python-client
except:
pass
entries=[
[ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
[ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
[ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
w.data_object.create(dict,type,uid) # instead of w.create(dict,type,uid), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#example-request-1
except weaviate.exceptions.ThingAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
"actions": {"classes": [],"type": "action"},
"things": {"classes": [{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
"cardinality": "atMostOne",
"dataType": ["text"],
"description": "The name of this person",
"name": "name"
}
]}],
"type": "thing"
}
}
w.schema.create(schema) # instead of w.create_schema(schema)
w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person") # instead of w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
w.data_object.create({"name": "Alan Turing"}, "Person")
w.data_object.create({"name": "John von Neumann"}, "Person")
w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
"things": {
"type": "thing",
"classes": [
{
"class": "Event",
"description": "event",
"properties": [
{
"name": "acronym",
"description": "acronym",
"dataType": [
"text"
]
},
{
"name": "inCity",
"description": "city reference",
"dataType": [
"City"
],
"cardinality": "many"
}
]
},
{
"class": "City",
"description": "city",
"properties": [
{
"name": "name",
"description": "name",
"dataType": [
"text"
]
},
{
"name": "hasEvent",
"description": "event references",
"dataType": [
"Event"
],
"cardinality": "many"
}
]
}
]
}
}
client = self.getClient()
if not client.contains_schema():
client.schema.create(schema) # instead of client.create_schema(schema)
event = {"acronym": "example"}
client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde"), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#add-a-cross-reference
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
There's no support for automatically deriving a schema from a list of dict (or other formats) yet. This could, as you mention, be a good convenience feature, so we add this to Weaviate's feature suggestions!
The new version of Weaviate is now available (v1.2.1 is the latest release at the time of writing this). With this version a lot of things were removed and even more added. One of the major breaking change is that actions and things were removed, objects were introduced instead. All the changes and features for weaviate v1.2 can be used with weaviate-client python library v2.3.
Most of the current weaviate-client functionality is explained and showed how it works in this article.
Here is the same unittests but for Weaviate v1.2.1 and written using weaviate-client v2.3.1:
import unittest
import weaviate
import time
#import getpass
person_schema = {
"classes": [
{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
"name": "name",
"description": "The name of this person",
"dataType": ["text"]
}
]
},
{
"class": "Group",
"description": "A set of persons who are associated with each other over some common properties",
"properties": [
{
"name": "name",
"description": "The name under which this group is known",
"dataType": ["text"]
},
{
"name": "members",
"description": "The persons that are part of this group",
"dataType": ["Person"]
}
]
}
]
}
class TestWeaviate(unittest.TestCase):
# NEW link to the page
# https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
def setUp(self):
self.port=8080
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
# NEW link to the page
# https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
w = self.getClient()
#contains_schema = w.schema.contains()
# it is a good idea to check if Weaviate has a schema already when testing, otherwise it will result in an error
# this way you know for sure that your current schema is known to weaviate.
if w.schema.contains():
# delete the existing schema, (removes all the data objects too)
w.schema.delete_all()
# instead of w.create_schema(person_schema)
w.schema.create(person_schema)
entries=[
[ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
[ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
[ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
# instead of w.create(dict,type,uid), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#create-a-data-object
w.data_object.create(dict,type,uid)
# ObjectAlreadyExistsException is the correct exception starting weaviate-client 2.0.0
except weaviate.exceptions.ObjectAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
#"actions": {"classes": [],"type": "action"}, `actions` and `things` were removed in weaviate v1.0 and removed in weaviate-client v2.0
# Now there is only `objects`
"classes": [
{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
#"cardinality": "atMostOne", were removed in weaviate v1.0 and weaviate-client v2.0
"dataType": ["text"],
"description": "The name of this person",
"name": "name"
}
]
}
]
}
# instead of w.create_schema(schema)
w.schema.create(schema)
# instead of w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person")
w.data_object.create({"name": "Alan Turing"}, "Person")
w.data_object.create({"name": "John von Neumann"}, "Person")
w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
# "things": { , were removed in weaviate v1.0 and weaviate-client v2.0
# "type": "thing", was removed in weaviate v1.0 and weaviate-client v2.0
"classes": [
{
"class": "Event",
"description": "event",
"properties": [
{
"name": "acronym",
"description": "acronym",
"dataType": [
"text"
]
},
{
"name": "inCity",
"description": "city reference",
"dataType": [
"City"
],
# "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
}
]
},
{
"class": "City",
"description": "city",
"properties": [
{
"name": "name",
"description": "name",
"dataType": [
"text"
]
},
{
"name": "hasEvent",
"description": "event references",
"dataType": [
"Event"
],
# "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
}
]
}
]
}
client = self.getClient()
# this test is going to fail if you are using the same Weaviate instance
# We already created a schema in the test above so the new schme is not going to be created
# and will result in an error.
# we can delete the schema and create a new one.
# instead of client.contains_schema()
if client.schema.contains():
# delete the existing schema, (removes all the data objects too)
client.schema.delete_all()
# instead of client.create_schema(schema)
client.schema.create(schema)
event = {"acronym": "example"}
# instead of client.create(...)
client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
# instead of client.add_reference(...), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#cross-references
client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

Resources