@@ -20,7 +20,7 @@ def update_exception_db(self, file_name, exp_msg):
20
20
is_cancelled_status = result [0 ]['is_cancelled' ]
21
21
if bool (is_cancelled_status ) == True :
22
22
job_status = 'Cancelled'
23
- self .graph .query ("""MERGE(d:__Document__ {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""" ,
23
+ self .graph .query ("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""" ,
24
24
{"fName" :file_name , "status" :job_status , "error_msg" :exp_msg })
25
25
except Exception as e :
26
26
error_message = str (e )
@@ -31,7 +31,7 @@ def create_source_node(self, obj_source_node:sourceNode):
31
31
try :
32
32
job_status = "New"
33
33
logging .info ("creating source node if does not exist" )
34
- self .graph .query ("""MERGE(d:__Document__ {fileName :$fn}) SET d.fileSize = $fs, d.fileType = $ft ,
34
+ self .graph .query ("""MERGE(d:Document {fileName :$fn}) SET d.fileSize = $fs, d.fileType = $ft ,
35
35
d.status = $st, d.url = $url, d.awsAccessKeyId = $awsacc_key_id,
36
36
d.fileSource = $f_source, d.createdAt = $c_at, d.updatedAt = $u_at,
37
37
d.processingTime = $pt, d.errorMessage = $e_message, d.nodeCount= $n_count,
@@ -95,7 +95,7 @@ def update_source_node(self, obj_source_node:sourceNode):
95
95
param = {"props" :params }
96
96
97
97
print (f'Base Param value 1 : { param } ' )
98
- query = "MERGE(d:__Document__ {fileName :$props.fileName}) SET d += $props"
98
+ query = "MERGE(d:Document {fileName :$props.fileName}) SET d += $props"
99
99
logging .info ("Update source node properties" )
100
100
self .graph .query (query ,param )
101
101
except Exception as e :
@@ -117,7 +117,7 @@ def get_source_list(self):
117
117
sorting the list by the last updated date.
118
118
"""
119
119
logging .info ("Get existing files list from graph" )
120
- query = "MATCH(d:__Document__ ) WHERE d.fileName IS NOT NULL RETURN d ORDER BY d.updatedAt DESC"
120
+ query = "MATCH(d:Document ) WHERE d.fileName IS NOT NULL RETURN d ORDER BY d.updatedAt DESC"
121
121
result = self .graph .query (query )
122
122
list_of_json_objects = [entry ['d' ] for entry in result ]
123
123
return list_of_json_objects
@@ -131,10 +131,10 @@ def update_KNN_graph(self):
131
131
knn_min_score = os .environ .get ('KNN_MIN_SCORE' )
132
132
if len (index ) > 0 :
133
133
logging .info ('update KNN graph' )
134
- self .graph .query ("""MATCH (c:__Chunk__ )
135
- WHERE c.embedding IS NOT NULL AND count { (c)-[:__SIMILAR__ ]-() } < 5
134
+ self .graph .query ("""MATCH (c:Chunk )
135
+ WHERE c.embedding IS NOT NULL AND count { (c)-[:SIMILAR ]-() } < 5
136
136
CALL db.index.vector.queryNodes('vector', 6, c.embedding) yield node, score
137
- WHERE node <> c and score >= $score MERGE (c)-[rel:__SIMILAR__ ]-(node) SET rel.score = score
137
+ WHERE node <> c and score >= $score MERGE (c)-[rel:SIMILAR ]-(node) SET rel.score = score
138
138
""" ,
139
139
{"score" :float (knn_min_score )}
140
140
)
@@ -174,7 +174,7 @@ def execute_query(self, query, param=None):
174
174
175
175
def get_current_status_document_node (self , file_name ):
176
176
query = """
177
- MATCH(d:__Document__ {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime,
177
+ MATCH(d:Document {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime,
178
178
d.nodeCount AS nodeCount, d.model as model, d.relationshipCount as relationshipCount,
179
179
d.total_pages AS total_pages, d.total_chunks AS total_chunks , d.fileSize as fileSize,
180
180
d.is_cancelled as is_cancelled, d.processed_chunk as processed_chunk, d.fileSource as fileSource
@@ -197,23 +197,23 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me
197
197
logging .info (f'Deleted File Path: { merged_file_path } and Deleted File Name : { file_name } ' )
198
198
delete_uploaded_local_file (merged_file_path ,file_name )
199
199
query_to_delete_document = """
200
- MATCH (d:__Document__ ) where d.fileName in $filename_list and d.fileSource in $source_types_list
200
+ MATCH (d:Document ) where d.fileName in $filename_list and d.fileSource in $source_types_list
201
201
with collect(d) as documents
202
202
unwind documents as d
203
- optional match (d)<-[:__PART_OF__ ]-(c:__Chunk__ )
203
+ optional match (d)<-[:PART_OF ]-(c:Chunk )
204
204
detach delete c, d
205
205
return count(*) as deletedChunks
206
206
"""
207
207
query_to_delete_document_and_entities = """
208
- MATCH (d:__Document__ ) where d.fileName in $filename_list and d.fileSource in $source_types_list
208
+ MATCH (d:Document ) where d.fileName in $filename_list and d.fileSource in $source_types_list
209
209
with collect(d) as documents
210
210
unwind documents as d
211
- optional match (d)<-[:__PART_OF__ ]-(c:__Chunk__ )
211
+ optional match (d)<-[:PART_OF ]-(c:Chunk )
212
212
// if delete-entities checkbox is set
213
213
call { with c, documents
214
- match (c)-[:__HAS_ENTITY__ ]->(e)
214
+ match (c)-[:HAS_ENTITY ]->(e)
215
215
// belongs to another document
216
- where not exists { (d2)<-[:__PART_OF__ ]-()-[:__HAS_ENTITY__ ]->(e) WHERE NOT d2 IN documents }
216
+ where not exists { (d2)<-[:PART_OF ]-()-[:HAS_ENTITY ]->(e) WHERE NOT d2 IN documents }
217
217
detach delete e
218
218
return count(*) as entities
219
219
}
@@ -231,17 +231,17 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me
231
231
232
232
def list_unconnected_nodes (self ):
233
233
query = """
234
- MATCH (e:!__Chunk__&!__Document__ )
235
- WHERE NOT exists { (e)--(:!__Chunk__&!__Document__ ) }
236
- OPTIONAL MATCH (doc:__Document__ )<-[:__PART_OF__ ]-(c:__Chunk__ )-[:__HAS_ENTITY__ ]->(e)
234
+ MATCH (e:!Chunk&!Document )
235
+ WHERE NOT exists { (e)--(:!Chunk&!Document ) }
236
+ OPTIONAL MATCH (doc:Document )<-[:PART_OF ]-(c:Chunk )-[:HAS_ENTITY ]->(e)
237
237
RETURN e {.*, embedding:null, elementId:elementId(e), labels:labels(e)} as e,
238
238
collect(distinct doc.fileName) as documents, count(distinct c) as chunkConnections
239
239
ORDER BY e.id ASC
240
240
LIMIT 100
241
241
"""
242
242
query_total_nodes = """
243
- MATCH (e:!__Chunk__&!__Document__ )
244
- WHERE NOT exists { (e)--(:!__Chunk__&!__Document__ ) }
243
+ MATCH (e:!Chunk&!Document )
244
+ WHERE NOT exists { (e)--(:!Chunk&!Document ) }
245
245
RETURN count(*) as total
246
246
"""
247
247
nodes_list = self .execute_query (query )
@@ -261,7 +261,7 @@ def get_duplicate_nodes_list(self):
261
261
score_value = float (os .environ .get ('DUPLICATE_SCORE_VALUE' ))
262
262
text_distance = int (os .environ .get ('DUPLICATE_TEXT_DISTANCE' ))
263
263
query_duplicate_nodes = """
264
- MATCH (n:!__Chunk__&!__Document__ ) with n
264
+ MATCH (n:!Chunk&!Document ) with n
265
265
WHERE n.embedding is not null and n.id is not null // and size(n.id) > 3
266
266
WITH n ORDER BY count {{ (n)--() }} DESC, size(n.id) DESC // updated
267
267
WITH collect(n) as nodes
@@ -289,7 +289,7 @@ def get_duplicate_nodes_list(self):
289
289
where none(other in all where other <> nodes and size(other) > size(nodes) and size(apoc.coll.subtract(nodes, other))=0)
290
290
return head(nodes) as n, tail(nodes) as similar
291
291
}}
292
- OPTIONAL MATCH (doc:__Document__ )<-[:__PART_OF__ ]-(c:__Chunk__ )-[:__HAS_ENTITY__ ]->(n)
292
+ OPTIONAL MATCH (doc:Document )<-[:PART_OF ]-(c:Chunk )-[:HAS_ENTITY ]->(n)
293
293
{return_statement}
294
294
"""
295
295
return_query_duplicate_nodes = """
@@ -335,7 +335,7 @@ def drop_create_vector_index(self, is_vector_index_recreate):
335
335
if is_vector_index_recreate == 'true' :
336
336
self .graph .query ("""drop index vector""" )
337
337
338
- self .graph .query ("""CREATE VECTOR INDEX `vector` if not exists for (c:__Chunk__ ) on (c.embedding)
338
+ self .graph .query ("""CREATE VECTOR INDEX `vector` if not exists for (c:Chunk ) on (c.embedding)
339
339
OPTIONS {indexConfig: {
340
340
`vector.dimensions`: $dimensions,
341
341
`vector.similarity_function`: 'cosine'
0 commit comments