@@ -20,7 +20,7 @@ def update_exception_db(self, file_name, exp_msg):
20
20
is_cancelled_status = result [0 ]['is_cancelled' ]
21
21
if is_cancelled_status == 'True' :
22
22
job_status = 'Cancelled'
23
- self .graph .query ("""MERGE(d:__Document__ {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""" ,
23
+ self .graph .query ("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""" ,
24
24
{"fName" :file_name , "status" :job_status , "error_msg" :exp_msg })
25
25
except Exception as e :
26
26
error_message = str (e )
@@ -31,7 +31,7 @@ def create_source_node(self, obj_source_node:sourceNode):
31
31
try :
32
32
job_status = "New"
33
33
logging .info ("creating source node if does not exist" )
34
- self .graph .query ("""MERGE(d:__Document__ {fileName :$fn}) SET d.fileSize = $fs, d.fileType = $ft ,
34
+ self .graph .query ("""MERGE(d:Document {fileName :$fn}) SET d.fileSize = $fs, d.fileType = $ft ,
35
35
d.status = $st, d.url = $url, d.awsAccessKeyId = $awsacc_key_id,
36
36
d.fileSource = $f_source, d.createdAt = $c_at, d.updatedAt = $u_at,
37
37
d.processingTime = $pt, d.errorMessage = $e_message, d.nodeCount= $n_count,
@@ -92,7 +92,7 @@ def update_source_node(self, obj_source_node:sourceNode):
92
92
param = {"props" :params }
93
93
94
94
print (f'Base Param value 1 : { param } ' )
95
- query = "MERGE(d:__Document__ {fileName :$props.fileName}) SET d += $props"
95
+ query = "MERGE(d:Document {fileName :$props.fileName}) SET d += $props"
96
96
logging .info ("Update source node properties" )
97
97
self .graph .query (query ,param )
98
98
except Exception as e :
@@ -114,7 +114,7 @@ def get_source_list(self):
114
114
sorting the list by the last updated date.
115
115
"""
116
116
logging .info ("Get existing files list from graph" )
117
- query = "MATCH(d:__Document__ ) WHERE d.fileName IS NOT NULL RETURN d ORDER BY d.updatedAt DESC"
117
+ query = "MATCH(d:Document ) WHERE d.fileName IS NOT NULL RETURN d ORDER BY d.updatedAt DESC"
118
118
result = self .graph .query (query )
119
119
list_of_json_objects = [entry ['d' ] for entry in result ]
120
120
return list_of_json_objects
@@ -128,10 +128,10 @@ def update_KNN_graph(self):
128
128
knn_min_score = os .environ .get ('KNN_MIN_SCORE' )
129
129
if len (index ) > 0 :
130
130
logging .info ('update KNN graph' )
131
- self .graph .query ("""MATCH (c:__Chunk__ )
132
- WHERE c.embedding IS NOT NULL AND count { (c)-[:__SIMILAR__ ]-() } < 5
131
+ self .graph .query ("""MATCH (c:Chunk )
132
+ WHERE c.embedding IS NOT NULL AND count { (c)-[:SIMILAR ]-() } < 5
133
133
CALL db.index.vector.queryNodes('vector', 6, c.embedding) yield node, score
134
- WHERE node <> c and score >= $score MERGE (c)-[rel:__SIMILAR__ ]-(node) SET rel.score = score
134
+ WHERE node <> c and score >= $score MERGE (c)-[rel:SIMILAR ]-(node) SET rel.score = score
135
135
""" ,
136
136
{"score" :float (knn_min_score )}
137
137
)
@@ -171,7 +171,7 @@ def execute_query(self, query, param=None):
171
171
172
172
def get_current_status_document_node (self , file_name ):
173
173
query = """
174
- MATCH(d:__Document__ {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime,
174
+ MATCH(d:Document {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime,
175
175
d.nodeCount AS nodeCount, d.model as model, d.relationshipCount as relationshipCount,
176
176
d.total_pages AS total_pages, d.total_chunks AS total_chunks , d.fileSize as fileSize,
177
177
d.is_cancelled as is_cancelled, d.processed_chunk as processed_chunk
@@ -194,23 +194,23 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me
194
194
logging .info (f'Deleted File Path: { merged_file_path } and Deleted File Name : { file_name } ' )
195
195
delete_uploaded_local_file (merged_file_path ,file_name )
196
196
query_to_delete_document = """
197
- MATCH (d:__Document__ ) where d.fileName in $filename_list and d.fileSource in $source_types_list
197
+ MATCH (d:Document ) where d.fileName in $filename_list and d.fileSource in $source_types_list
198
198
with collect(d) as documents
199
199
unwind documents as d
200
- optional match (d)<-[:__PART_OF__ ]-(c:__Chunk__ )
200
+ optional match (d)<-[:PART_OF ]-(c:Chunk )
201
201
detach delete c, d
202
202
return count(*) as deletedChunks
203
203
"""
204
204
query_to_delete_document_and_entities = """
205
- MATCH (d:__Document__ ) where d.fileName in $filename_list and d.fileSource in $source_types_list
205
+ MATCH (d:Document ) where d.fileName in $filename_list and d.fileSource in $source_types_list
206
206
with collect(d) as documents
207
207
unwind documents as d
208
- optional match (d)<-[:__PART_OF__ ]-(c:__Chunk__ )
208
+ optional match (d)<-[:PART_OF ]-(c:Chunk )
209
209
// if delete-entities checkbox is set
210
210
call { with c, documents
211
- match (c)-[:__HAS_ENTITY__ ]->(e)
211
+ match (c)-[:HAS_ENTITY ]->(e)
212
212
// belongs to another document
213
- where not exists { (d2)<-[:__PART_OF__ ]-()-[:__HAS_ENTITY__ ]->(e) WHERE NOT d2 IN documents }
213
+ where not exists { (d2)<-[:PART_OF ]-()-[:HAS_ENTITY ]->(e) WHERE NOT d2 IN documents }
214
214
detach delete e
215
215
return count(*) as entities
216
216
}
@@ -228,17 +228,17 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me
228
228
229
229
def list_unconnected_nodes (self ):
230
230
query = """
231
- MATCH (e:!__Chunk__&!__Document__ )
232
- WHERE NOT exists { (e)--(:!__Chunk__&!__Document__ ) }
233
- OPTIONAL MATCH (doc:__Document__ )<-[:__PART_OF__ ]-(c:__Chunk__ )-[:__HAS_ENTITY__ ]->(e)
231
+ MATCH (e:!Chunk&!Document )
232
+ WHERE NOT exists { (e)--(:!Chunk&!Document ) }
233
+ OPTIONAL MATCH (doc:Document )<-[:PART_OF ]-(c:Chunk )-[:HAS_ENTITY ]->(e)
234
234
RETURN e {.*, embedding:null, elementId:elementId(e), labels:labels(e)} as e,
235
235
collect(distinct doc.fileName) as documents, count(distinct c) as chunkConnections
236
236
ORDER BY e.id ASC
237
237
LIMIT 100
238
238
"""
239
239
query_total_nodes = """
240
- MATCH (e:!__Chunk__&!__Document__ )
241
- WHERE NOT exists { (e)--(:!__Chunk__&!__Document__ ) }
240
+ MATCH (e:!Chunk&!Document )
241
+ WHERE NOT exists { (e)--(:!Chunk&!Document ) }
242
242
RETURN count(*) as total
243
243
"""
244
244
nodes_list = self .execute_query (query )
@@ -258,7 +258,7 @@ def get_duplicate_nodes_list(self):
258
258
score_value = float (os .environ .get ('DUPLICATE_SCORE_VALUE' ))
259
259
text_distance = int (os .environ .get ('DUPLICATE_TEXT_DISTANCE' ))
260
260
query_duplicate_nodes = """
261
- MATCH (n:!__Chunk__&!__Document__ ) with n
261
+ MATCH (n:!Chunk&!Document ) with n
262
262
WHERE n.embedding is not null and n.id is not null // and size(n.id) > 3
263
263
WITH n ORDER BY count {{ (n)--() }} DESC, size(n.id) DESC // updated
264
264
WITH collect(n) as nodes
@@ -286,7 +286,7 @@ def get_duplicate_nodes_list(self):
286
286
where none(other in all where other <> nodes and size(other) > size(nodes) and size(apoc.coll.subtract(nodes, other))=0)
287
287
return head(nodes) as n, tail(nodes) as similar
288
288
}}
289
- OPTIONAL MATCH (doc:__Document__ )<-[:__PART_OF__ ]-(c:__Chunk__ )-[:__HAS_ENTITY__ ]->(n)
289
+ OPTIONAL MATCH (doc:Document )<-[:PART_OF ]-(c:Chunk )-[:HAS_ENTITY ]->(n)
290
290
{return_statement}
291
291
"""
292
292
return_query_duplicate_nodes = """
@@ -332,7 +332,7 @@ def drop_create_vector_index(self, is_vector_index_recreate):
332
332
if is_vector_index_recreate == 'true' :
333
333
self .graph .query ("""drop index vector""" )
334
334
335
- self .graph .query ("""CREATE VECTOR INDEX `vector` if not exists for (c:__Chunk__ ) on (c.embedding)
335
+ self .graph .query ("""CREATE VECTOR INDEX `vector` if not exists for (c:Chunk ) on (c.embedding)
336
336
OPTIONS {indexConfig: {
337
337
`vector.dimensions`: $dimensions,
338
338
`vector.similarity_function`: 'cosine'
0 commit comments