@@ -518,26 +518,30 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition):
518
518
else :
519
519
chunkId_chunkDoc_list = []
520
520
chunks = graph .query (QUERY_TO_GET_CHUNKS , params = {"filename" :file_name })
521
- for chunk in chunks :
522
- chunk_doc = Document (page_content = chunk ['text' ], metadata = {'id' :chunk ['id' ], 'position' :chunk ['position' ]})
523
- chunkId_chunkDoc_list .append ({'chunk_id' : chunk ['id' ], 'chunk_doc' : chunk_doc })
524
521
525
- if retry_condition == START_FROM_LAST_PROCESSED_POSITION :
526
- logging .info (f"Retry : start_from_last_processed_position" )
527
- starting_chunk = graph .query (QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION , params = {"filename" :file_name })
528
- if starting_chunk [0 ]["position" ] < len (chunkId_chunkDoc_list ):
529
- return len (chunks ), chunkId_chunkDoc_list [starting_chunk [0 ]["position" ] - 1 :]
522
+ if chunks [0 ]['text' ] is None or chunks [0 ]['text' ]== "" :
523
+ raise Exception (f"Chunks are not created for { file_name } . Please re-upload file and try." )
524
+ else :
525
+ for chunk in chunks :
526
+ chunk_doc = Document (page_content = chunk ['text' ], metadata = {'id' :chunk ['id' ], 'position' :chunk ['position' ]})
527
+ chunkId_chunkDoc_list .append ({'chunk_id' : chunk ['id' ], 'chunk_doc' : chunk_doc })
530
528
531
- elif starting_chunk [0 ]["position" ] == len (chunkId_chunkDoc_list ):
532
- starting_chunk = graph .query (QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY , params = {"filename" :file_name })
533
- return len (chunks ), chunkId_chunkDoc_list [starting_chunk [0 ]["position" ] - 1 :]
529
+ if retry_condition == START_FROM_LAST_PROCESSED_POSITION :
530
+ logging .info (f"Retry : start_from_last_processed_position" )
531
+ starting_chunk = graph .query (QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION , params = {"filename" :file_name })
532
+ if starting_chunk [0 ]["position" ] < len (chunkId_chunkDoc_list ):
533
+ return len (chunks ), chunkId_chunkDoc_list [starting_chunk [0 ]["position" ] - 1 :]
534
+
535
+ elif starting_chunk [0 ]["position" ] == len (chunkId_chunkDoc_list ):
536
+ starting_chunk = graph .query (QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY , params = {"filename" :file_name })
537
+ return len (chunks ), chunkId_chunkDoc_list [starting_chunk [0 ]["position" ] - 1 :]
538
+
539
+ else :
540
+ raise Exception (f"All chunks of { file_name } are alreday processed. If you want to re-process, Please start from begnning" )
534
541
535
542
else :
536
- raise Exception (f"All chunks of { file_name } are alreday processed. If you want to re-process, Please start from begnning" )
537
-
538
- else :
539
- logging .info (f"Retry : start_from_beginning with chunks { len (chunkId_chunkDoc_list )} " )
540
- return len (chunks ), chunkId_chunkDoc_list
543
+ logging .info (f"Retry : start_from_beginning with chunks { len (chunkId_chunkDoc_list )} " )
544
+ return len (chunks ), chunkId_chunkDoc_list
541
545
542
546
def get_source_list_from_graph (uri ,userName ,password ,db_name = None ):
543
547
"""
0 commit comments