neo4j-labs · karanchellani · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/backend/src/entities/source_node.py b/backend/src/entities/source_node.py
@@ -11,6 +11,12 @@ class sourceNode:
     gcsBucketFolder:str=None
     gcsProjectId:str=None
     awsAccessKeyId:str=None
+    chunkNodeCount:int=None
+    chunkRelCount:int=None
+    entityNodeCount:int=None
+    entityEntityRelCount:int=None
+    communityNodeCount:int=None
+    communityRelCount:int=None
     node_count:int=None
     relationship_count:str=None
     model:str=None

diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py
@@ -46,14 +46,24 @@ def create_source_node(self, obj_source_node:sourceNode):
                             d.relationshipCount = $r_count, d.model= $model, d.gcsBucket=$gcs_bucket, 
                             d.gcsBucketFolder= $gcs_bucket_folder, d.language= $language,d.gcsProjectId= $gcs_project_id,
                             d.is_cancelled=False, d.total_chunks=0, d.processed_chunk=0,
-                            d.access_token=$access_token""",
+                            d.access_token=$access_token,
+                            d.chunkNodeCount=$chunkNodeCount,d.chunkRelCount=$chunkRelCount,
+                            d.entityNodeCount=$entityNodeCount,d.entityEntityRelCount=$entityEntityRelCount,
+                            d.communityNodeCount=$communityNodeCount,d.communityRelCount=$communityRelCount""",
                             {"fn":obj_source_node.file_name, "fs":obj_source_node.file_size, "ft":obj_source_node.file_type, "st":job_status, 
                             "url":obj_source_node.url,
                             "awsacc_key_id":obj_source_node.awsAccessKeyId, "f_source":obj_source_node.file_source, "c_at":obj_source_node.created_at,
                             "u_at":obj_source_node.created_at, "pt":0, "e_message":'', "n_count":0, "r_count":0, "model":obj_source_node.model,
                             "gcs_bucket": obj_source_node.gcsBucket, "gcs_bucket_folder": obj_source_node.gcsBucketFolder, 
                             "language":obj_source_node.language, "gcs_project_id":obj_source_node.gcsProjectId,
-                            "access_token":obj_source_node.access_token})
+                            "access_token":obj_source_node.access_token,
+                            "chunkNodeCount":obj_source_node.chunkNodeCount,
+                            "chunkRelCount":obj_source_node.chunkRelCount,
+                            "entityNodeCount":obj_source_node.entityNodeCount,
+                            "entityEntityRelCount":obj_source_node.entityEntityRelCount,
+                            "communityNodeCount":obj_source_node.communityNodeCount,
+                            "communityRelCount":obj_source_node.communityRelCount
+                            })
         except Exception as e:
             error_message = str(e)
             logging.info(f"error_message = {error_message}")
@@ -463,51 +473,52 @@ def update_node_relationship_count(self,document_name):
             param = {"document_name": document_name}
             result = self.execute_query(NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY, param)
         response = {}
-        for record in result:
-            filename = record["filename"]
-            chunkNodeCount = record["chunkNodeCount"]
-            chunkRelCount = record["chunkRelCount"]
-            entityNodeCount = record["entityNodeCount"]
-            entityEntityRelCount = record["entityEntityRelCount"]
-            if (not document_name) and (community_flag):
-                communityNodeCount = record["communityNodeCount"]
-                communityRelCount = record["communityRelCount"]
-            else:
-                communityNodeCount = 0
-                communityRelCount = 0
-            nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
-            relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
-            update_query = """
-            MATCH (d:Document {fileName: $filename})
-            SET d.chunkNodeCount = $chunkNodeCount,
-                d.chunkRelCount = $chunkRelCount,
-                d.entityNodeCount = $entityNodeCount,
-                d.entityEntityRelCount = $entityEntityRelCount,
-                d.communityNodeCount = $communityNodeCount,
-                d.communityRelCount = $communityRelCount,
-                d.nodeCount = $nodeCount,
-                d.relationshipCount = $relationshipCount
-            """
-            self.execute_query(update_query,{
-                "filename": filename,
-                "chunkNodeCount": chunkNodeCount,
-                "chunkRelCount": chunkRelCount,
-                "entityNodeCount": entityNodeCount,
-                "entityEntityRelCount": entityEntityRelCount,
-                "communityNodeCount": communityNodeCount,
-                "communityRelCount": communityRelCount,
-                "nodeCount" : nodeCount,
-                "relationshipCount" : relationshipCount
-                })
-
-            response[filename] = {"chunkNodeCount": chunkNodeCount,
-                "chunkRelCount": chunkRelCount,
-                "entityNodeCount": entityNodeCount,
-                "entityEntityRelCount": entityEntityRelCount,
-                "communityNodeCount": communityNodeCount,
-                "communityRelCount": communityRelCount,
-                "nodeCount" : nodeCount,
-                "relationshipCount" : relationshipCount
-                }
+        if result:
+            for record in result:
+                filename = record.get("filename",None)
+                chunkNodeCount = int(record.get("chunkNodeCount",0))
+                chunkRelCount = int(record.get("chunkRelCount",0))
+                entityNodeCount = int(record.get("entityNodeCount",0))
+                entityEntityRelCount = int(record.get("entityEntityRelCount",0))
+                if (not document_name) and (community_flag):
+                    communityNodeCount = int(record.get("communityNodeCount",0))
+                    communityRelCount = int(record.get("communityRelCount",0))
+                else:
+                    communityNodeCount = 0
+                    communityRelCount = 0
+                nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
+                relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
+                update_query = """
+                MATCH (d:Document {fileName: $filename})
+                SET d.chunkNodeCount = $chunkNodeCount,
+                    d.chunkRelCount = $chunkRelCount,
+                    d.entityNodeCount = $entityNodeCount,
+                    d.entityEntityRelCount = $entityEntityRelCount,
+                    d.communityNodeCount = $communityNodeCount,
+                    d.communityRelCount = $communityRelCount,
+                    d.nodeCount = $nodeCount,
+                    d.relationshipCount = $relationshipCount
+                """
+                self.execute_query(update_query,{
+                    "filename": filename,
+                    "chunkNodeCount": chunkNodeCount,
+                    "chunkRelCount": chunkRelCount,
+                    "entityNodeCount": entityNodeCount,
+                    "entityEntityRelCount": entityEntityRelCount,
+                    "communityNodeCount": communityNodeCount,
+                    "communityRelCount": communityRelCount,
+                    "nodeCount" : nodeCount,
+                    "relationshipCount" : relationshipCount
+                    })
+
+                response[filename] = {"chunkNodeCount": chunkNodeCount,
+                    "chunkRelCount": chunkRelCount,
+                    "entityNodeCount": entityNodeCount,
+                    "entityEntityRelCount": entityEntityRelCount,
+                    "communityNodeCount": communityNodeCount,
+                    "communityRelCount": communityRelCount,
+                    "nodeCount" : nodeCount,
+                    "relationshipCount" : relationshipCount
+                    }
 
         return response
diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py
@@ -226,6 +226,7 @@ def get_graph_results(uri, username, password,database,document_names):
 
 def get_chunktext_results(uri, username, password, database, document_name, page_no):
    """Retrieves chunk text, position, and page number from graph data with pagination."""
+   driver = None
    try:
        logging.info("Starting chunk text query process")
        offset = 10
@@ -254,4 +255,5 @@ def get_chunktext_results(uri, username, password, database, document_name, page
        logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}")
        raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e
    finally:
-       driver.close()
+       if driver:
+           driver.close()
diff --git a/backend/src/main.py b/backend/src/main.py
@@ -56,6 +56,12 @@ def create_source_node_graph_url_s3(graph, model, source_url, aws_access_key_id,
         obj_source_node.url = str(source_url+file_name)
         obj_source_node.awsAccessKeyId = aws_access_key_id
         obj_source_node.created_at = datetime.now()
+        obj_source_node.chunkNodeCount=0
+        obj_source_node.chunkRelCount=0
+        obj_source_node.entityNodeCount=0
+        obj_source_node.entityEntityRelCount=0
+        obj_source_node.communityNodeCount=0
+        obj_source_node.communityRelCount=0
         try:
           graphDb_data_Access = graphDBdataAccess(graph)
           graphDb_data_Access.create_source_node(obj_source_node)
@@ -88,6 +94,12 @@ def create_source_node_graph_url_gcs(graph, model, gcs_project_id, gcs_bucket_na
       obj_source_node.gcsProjectId = file_metadata['gcsProjectId']
       obj_source_node.created_at = datetime.now()
       obj_source_node.access_token = credentials.token
+      obj_source_node.chunkNodeCount=0
+      obj_source_node.chunkRelCount=0
+      obj_source_node.entityNodeCount=0
+      obj_source_node.entityEntityRelCount=0
+      obj_source_node.communityNodeCount=0
+      obj_source_node.communityRelCount=0
 
       try:
           graphDb_data_Access = graphDBdataAccess(graph)
@@ -119,7 +131,12 @@ def create_source_node_graph_web_url(graph, model, source_url, source_type):
     obj_source_node.file_name = pages[0].metadata['title']
     obj_source_node.language = pages[0].metadata['language'] 
     obj_source_node.file_size = sys.getsizeof(pages[0].page_content)
-
+    obj_source_node.chunkNodeCount=0
+    obj_source_node.chunkRelCount=0
+    obj_source_node.entityNodeCount=0
+    obj_source_node.entityEntityRelCount=0
+    obj_source_node.communityNodeCount=0
+    obj_source_node.communityRelCount=0
     graphDb_data_Access = graphDBdataAccess(graph)
     graphDb_data_Access.create_source_node(obj_source_node)
     lst_file_name.append({'fileName':obj_source_node.file_name,'fileSize':obj_source_node.file_size,'url':obj_source_node.url,'status':'Success'})
@@ -138,6 +155,12 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type):
     obj_source_node.model = model
     obj_source_node.url = youtube_url
     obj_source_node.created_at = datetime.now()
+    obj_source_node.chunkNodeCount=0
+    obj_source_node.chunkRelCount=0
+    obj_source_node.entityNodeCount=0
+    obj_source_node.entityEntityRelCount=0
+    obj_source_node.communityNodeCount=0
+    obj_source_node.communityRelCount=0
     match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url)
     logging.info(f"match value: {match}")
     video_id = parse_qs(urlparse(youtube_url).query).get('v')
@@ -180,6 +203,12 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type
       obj_source_node.url = urllib.parse.unquote(pages[0].metadata['source'])
       obj_source_node.created_at = datetime.now()
       obj_source_node.language = language
+      obj_source_node.chunkNodeCount=0
+      obj_source_node.chunkRelCount=0
+      obj_source_node.entityNodeCount=0
+      obj_source_node.entityEntityRelCount=0
+      obj_source_node.communityNodeCount=0
+      obj_source_node.communityRelCount=0
       graphDb_data_Access = graphDBdataAccess(graph)
       graphDb_data_Access.create_source_node(obj_source_node)
       success_count+=1
@@ -618,6 +647,12 @@ def upload_file(graph, model, chunk, chunk_number:int, total_chunks:int, origina
       obj_source_node.file_source = 'local file'
       obj_source_node.model = model
       obj_source_node.created_at = datetime.now()
+      obj_source_node.chunkNodeCount=0
+      obj_source_node.chunkRelCount=0
+      obj_source_node.entityNodeCount=0
+      obj_source_node.entityEntityRelCount=0
+      obj_source_node.communityNodeCount=0
+      obj_source_node.communityRelCount=0
       graphDb_data_Access = graphDBdataAccess(graph)
 
       graphDb_data_Access.create_source_node(obj_source_node)