Skip to content

Error & warning handling #938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/src/entities/source_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ class sourceNode:
gcsBucketFolder:str=None
gcsProjectId:str=None
awsAccessKeyId:str=None
chunkNodeCount:int=None
chunkRelCount:int=None
entityNodeCount:int=None
entityEntityRelCount:int=None
communityNodeCount:int=None
communityRelCount:int=None
node_count:int=None
relationship_count:str=None
model:str=None
Expand Down
107 changes: 59 additions & 48 deletions backend/src/graphDB_dataAccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,24 @@ def create_source_node(self, obj_source_node:sourceNode):
d.relationshipCount = $r_count, d.model= $model, d.gcsBucket=$gcs_bucket,
d.gcsBucketFolder= $gcs_bucket_folder, d.language= $language,d.gcsProjectId= $gcs_project_id,
d.is_cancelled=False, d.total_chunks=0, d.processed_chunk=0,
d.access_token=$access_token""",
d.access_token=$access_token,
d.chunkNodeCount=$chunkNodeCount,d.chunkRelCount=$chunkRelCount,
d.entityNodeCount=$entityNodeCount,d.entityEntityRelCount=$entityEntityRelCount,
d.communityNodeCount=$communityNodeCount,d.communityRelCount=$communityRelCount""",
{"fn":obj_source_node.file_name, "fs":obj_source_node.file_size, "ft":obj_source_node.file_type, "st":job_status,
"url":obj_source_node.url,
"awsacc_key_id":obj_source_node.awsAccessKeyId, "f_source":obj_source_node.file_source, "c_at":obj_source_node.created_at,
"u_at":obj_source_node.created_at, "pt":0, "e_message":'', "n_count":0, "r_count":0, "model":obj_source_node.model,
"gcs_bucket": obj_source_node.gcsBucket, "gcs_bucket_folder": obj_source_node.gcsBucketFolder,
"language":obj_source_node.language, "gcs_project_id":obj_source_node.gcsProjectId,
"access_token":obj_source_node.access_token})
"access_token":obj_source_node.access_token,
"chunkNodeCount":obj_source_node.chunkNodeCount,
"chunkRelCount":obj_source_node.chunkRelCount,
"entityNodeCount":obj_source_node.entityNodeCount,
"entityEntityRelCount":obj_source_node.entityEntityRelCount,
"communityNodeCount":obj_source_node.communityNodeCount,
"communityRelCount":obj_source_node.communityRelCount
})
except Exception as e:
error_message = str(e)
logging.info(f"error_message = {error_message}")
Expand Down Expand Up @@ -463,51 +473,52 @@ def update_node_relationship_count(self,document_name):
param = {"document_name": document_name}
result = self.execute_query(NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY, param)
response = {}
for record in result:
filename = record["filename"]
chunkNodeCount = record["chunkNodeCount"]
chunkRelCount = record["chunkRelCount"]
entityNodeCount = record["entityNodeCount"]
entityEntityRelCount = record["entityEntityRelCount"]
if (not document_name) and (community_flag):
communityNodeCount = record["communityNodeCount"]
communityRelCount = record["communityRelCount"]
else:
communityNodeCount = 0
communityRelCount = 0
nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
update_query = """
MATCH (d:Document {fileName: $filename})
SET d.chunkNodeCount = $chunkNodeCount,
d.chunkRelCount = $chunkRelCount,
d.entityNodeCount = $entityNodeCount,
d.entityEntityRelCount = $entityEntityRelCount,
d.communityNodeCount = $communityNodeCount,
d.communityRelCount = $communityRelCount,
d.nodeCount = $nodeCount,
d.relationshipCount = $relationshipCount
"""
self.execute_query(update_query,{
"filename": filename,
"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
})

response[filename] = {"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
}
if result:
for record in result:
filename = record.get("filename",None)
chunkNodeCount = int(record.get("chunkNodeCount",0))
chunkRelCount = int(record.get("chunkRelCount",0))
entityNodeCount = int(record.get("entityNodeCount",0))
entityEntityRelCount = int(record.get("entityEntityRelCount",0))
if (not document_name) and (community_flag):
communityNodeCount = int(record.get("communityNodeCount",0))
communityRelCount = int(record.get("communityRelCount",0))
else:
communityNodeCount = 0
communityRelCount = 0
nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
update_query = """
MATCH (d:Document {fileName: $filename})
SET d.chunkNodeCount = $chunkNodeCount,
d.chunkRelCount = $chunkRelCount,
d.entityNodeCount = $entityNodeCount,
d.entityEntityRelCount = $entityEntityRelCount,
d.communityNodeCount = $communityNodeCount,
d.communityRelCount = $communityRelCount,
d.nodeCount = $nodeCount,
d.relationshipCount = $relationshipCount
"""
self.execute_query(update_query,{
"filename": filename,
"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
})

response[filename] = {"chunkNodeCount": chunkNodeCount,
"chunkRelCount": chunkRelCount,
"entityNodeCount": entityNodeCount,
"entityEntityRelCount": entityEntityRelCount,
"communityNodeCount": communityNodeCount,
"communityRelCount": communityRelCount,
"nodeCount" : nodeCount,
"relationshipCount" : relationshipCount
}

return response
4 changes: 3 additions & 1 deletion backend/src/graph_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ def get_graph_results(uri, username, password,database,document_names):

def get_chunktext_results(uri, username, password, database, document_name, page_no):
"""Retrieves chunk text, position, and page number from graph data with pagination."""
driver = None
try:
logging.info("Starting chunk text query process")
offset = 10
Expand Down Expand Up @@ -254,4 +255,5 @@ def get_chunktext_results(uri, username, password, database, document_name, page
logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}")
raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e
finally:
driver.close()
if driver:
driver.close()
37 changes: 36 additions & 1 deletion backend/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ def create_source_node_graph_url_s3(graph, model, source_url, aws_access_key_id,
obj_source_node.url = str(source_url+file_name)
obj_source_node.awsAccessKeyId = aws_access_key_id
obj_source_node.created_at = datetime.now()
obj_source_node.chunkNodeCount=0
obj_source_node.chunkRelCount=0
obj_source_node.entityNodeCount=0
obj_source_node.entityEntityRelCount=0
obj_source_node.communityNodeCount=0
obj_source_node.communityRelCount=0
try:
graphDb_data_Access = graphDBdataAccess(graph)
graphDb_data_Access.create_source_node(obj_source_node)
Expand Down Expand Up @@ -88,6 +94,12 @@ def create_source_node_graph_url_gcs(graph, model, gcs_project_id, gcs_bucket_na
obj_source_node.gcsProjectId = file_metadata['gcsProjectId']
obj_source_node.created_at = datetime.now()
obj_source_node.access_token = credentials.token
obj_source_node.chunkNodeCount=0
obj_source_node.chunkRelCount=0
obj_source_node.entityNodeCount=0
obj_source_node.entityEntityRelCount=0
obj_source_node.communityNodeCount=0
obj_source_node.communityRelCount=0

try:
graphDb_data_Access = graphDBdataAccess(graph)
Expand Down Expand Up @@ -119,7 +131,12 @@ def create_source_node_graph_web_url(graph, model, source_url, source_type):
obj_source_node.file_name = pages[0].metadata['title']
obj_source_node.language = pages[0].metadata['language']
obj_source_node.file_size = sys.getsizeof(pages[0].page_content)

obj_source_node.chunkNodeCount=0
obj_source_node.chunkRelCount=0
obj_source_node.entityNodeCount=0
obj_source_node.entityEntityRelCount=0
obj_source_node.communityNodeCount=0
obj_source_node.communityRelCount=0
graphDb_data_Access = graphDBdataAccess(graph)
graphDb_data_Access.create_source_node(obj_source_node)
lst_file_name.append({'fileName':obj_source_node.file_name,'fileSize':obj_source_node.file_size,'url':obj_source_node.url,'status':'Success'})
Expand All @@ -138,6 +155,12 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type):
obj_source_node.model = model
obj_source_node.url = youtube_url
obj_source_node.created_at = datetime.now()
obj_source_node.chunkNodeCount=0
obj_source_node.chunkRelCount=0
obj_source_node.entityNodeCount=0
obj_source_node.entityEntityRelCount=0
obj_source_node.communityNodeCount=0
obj_source_node.communityRelCount=0
match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url)
logging.info(f"match value: {match}")
video_id = parse_qs(urlparse(youtube_url).query).get('v')
Expand Down Expand Up @@ -180,6 +203,12 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type
obj_source_node.url = urllib.parse.unquote(pages[0].metadata['source'])
obj_source_node.created_at = datetime.now()
obj_source_node.language = language
obj_source_node.chunkNodeCount=0
obj_source_node.chunkRelCount=0
obj_source_node.entityNodeCount=0
obj_source_node.entityEntityRelCount=0
obj_source_node.communityNodeCount=0
obj_source_node.communityRelCount=0
graphDb_data_Access = graphDBdataAccess(graph)
graphDb_data_Access.create_source_node(obj_source_node)
success_count+=1
Expand Down Expand Up @@ -618,6 +647,12 @@ def upload_file(graph, model, chunk, chunk_number:int, total_chunks:int, origina
obj_source_node.file_source = 'local file'
obj_source_node.model = model
obj_source_node.created_at = datetime.now()
obj_source_node.chunkNodeCount=0
obj_source_node.chunkRelCount=0
obj_source_node.entityNodeCount=0
obj_source_node.entityEntityRelCount=0
obj_source_node.communityNodeCount=0
obj_source_node.communityRelCount=0
graphDb_data_Access = graphDBdataAccess(graph)

graphDb_data_Access.create_source_node(obj_source_node)
Expand Down
Loading