-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathloadRDFFromJsonLines.py
38 lines (26 loc) · 1.26 KB
/
loadRDFFromJsonLines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from zipfile import ZipFile
from neo4j import GraphDatabase
cypher_neosemantics = 'UNWIND $payload as rdf_fragment \
CALL semantics.importRDFSnippet(rdf_fragment,"JSON-LD") \
YIELD terminationStatus, triplesLoaded, triplesParsed, extraInfo \
RETURN terminationStatus, sum(triplesLoaded) as totalLoaded, sum(triplesParsed) as totalParsed '
jsonl_file_name = "lines.jsonl"
batch_size = 17
uri = "bolt://localhost:7687"
def load_batch(tx, batch):
print("Submitting batch of size " + str(len(batch)))
for record in tx.run(cypher_neosemantics, payload=batch):
print('status: ', record["terminationStatus"], ', triplesLoaded: ', record["totalLoaded"],
', triplesParsed: ', record["totalParsed"])
driver = GraphDatabase.driver(uri, auth=("neo4j", "neo"))
with driver.session() as session:
with ZipFile(jsonl_file_name + '.zip', 'r') as zip:
with zip.open(jsonl_file_name, mode='r') as jsonl_file:
batch = []
for line in jsonl_file:
batch.append(line.decode('utf-8'))
if len(batch) == batch_size:
session.write_transaction(load_batch, batch)
batch = []
session.read_transaction(load_batch, batch)
driver.close()