@@ -208,7 +208,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type
208
208
lst_file_name .append ({'fileName' :obj_source_node .file_name ,'fileSize' :obj_source_node .file_size ,'url' :obj_source_node .url , 'language' :obj_source_node .language , 'status' :'Success' })
209
209
return lst_file_name ,success_count ,failed_count
210
210
211
- def extract_graph_from_file_local_file (uri , userName , password , database , model , merged_file_path , fileName , allowedNodes , allowedRelationship , retry_condition ):
211
+ async def extract_graph_from_file_local_file (uri , userName , password , database , model , merged_file_path , fileName , allowedNodes , allowedRelationship , retry_condition ):
212
212
213
213
logging .info (f'Process file name :{ fileName } ' )
214
214
if retry_condition is None :
@@ -220,11 +220,11 @@ def extract_graph_from_file_local_file(uri, userName, password, database, model,
220
220
file_name , pages , file_extension = get_documents_from_file_by_path (merged_file_path ,fileName )
221
221
if pages == None or len (pages )== 0 :
222
222
raise Exception (f'File content is not available for file : { file_name } ' )
223
- return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship , True , merged_file_path )
223
+ return await processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship , True , merged_file_path )
224
224
else :
225
- return processing_source (uri , userName , password , database , model , fileName , [], allowedNodes , allowedRelationship , True , merged_file_path , retry_condition )
225
+ return await processing_source (uri , userName , password , database , model , fileName , [], allowedNodes , allowedRelationship , True , merged_file_path , retry_condition )
226
226
227
- def extract_graph_from_file_s3 (uri , userName , password , database , model , source_url , aws_access_key_id , aws_secret_access_key , file_name , allowedNodes , allowedRelationship , retry_condition ):
227
+ async def extract_graph_from_file_s3 (uri , userName , password , database , model , source_url , aws_access_key_id , aws_secret_access_key , file_name , allowedNodes , allowedRelationship , retry_condition ):
228
228
if retry_condition is None :
229
229
if (aws_access_key_id == None or aws_secret_access_key == None ):
230
230
raise Exception ('Please provide AWS access and secret keys' )
@@ -234,49 +234,49 @@ def extract_graph_from_file_s3(uri, userName, password, database, model, source_
234
234
235
235
if pages == None or len (pages )== 0 :
236
236
raise Exception (f'File content is not available for file : { file_name } ' )
237
- return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
237
+ return await processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
238
238
else :
239
- return processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
239
+ return await processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
240
240
241
- def extract_graph_from_web_page (uri , userName , password , database , model , source_url , file_name , allowedNodes , allowedRelationship , retry_condition ):
241
+ async def extract_graph_from_web_page (uri , userName , password , database , model , source_url , file_name , allowedNodes , allowedRelationship , retry_condition ):
242
242
if retry_condition is None :
243
243
file_name , pages = get_documents_from_web_page (source_url )
244
244
245
245
if pages == None or len (pages )== 0 :
246
246
raise Exception (f'Content is not available for given URL : { file_name } ' )
247
- return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
247
+ return await processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
248
248
else :
249
- return processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
249
+ return await processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
250
250
251
- def extract_graph_from_file_youtube (uri , userName , password , database , model , source_url , file_name , allowedNodes , allowedRelationship , retry_condition ):
251
+ async def extract_graph_from_file_youtube (uri , userName , password , database , model , source_url , file_name , allowedNodes , allowedRelationship , retry_condition ):
252
252
if retry_condition is None :
253
253
file_name , pages = get_documents_from_youtube (source_url )
254
254
255
255
if pages == None or len (pages )== 0 :
256
256
raise Exception (f'Youtube transcript is not available for file : { file_name } ' )
257
- return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
257
+ return await processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
258
258
else :
259
- return processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
259
+ return await processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
260
260
261
- def extract_graph_from_file_Wikipedia (uri , userName , password , database , model , wiki_query , language , file_name , allowedNodes , allowedRelationship , retry_condition ):
261
+ async def extract_graph_from_file_Wikipedia (uri , userName , password , database , model , wiki_query , language , file_name , allowedNodes , allowedRelationship , retry_condition ):
262
262
if retry_condition is None :
263
263
file_name , pages = get_documents_from_Wikipedia (wiki_query , language )
264
264
if pages == None or len (pages )== 0 :
265
265
raise Exception (f'Wikipedia page is not available for file : { file_name } ' )
266
- return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
266
+ return await processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
267
267
else :
268
- return processing_source (uri , userName , password , database , model , file_name ,[], allowedNodes , allowedRelationship , retry_condition = retry_condition )
268
+ return await processing_source (uri , userName , password , database , model , file_name ,[], allowedNodes , allowedRelationship , retry_condition = retry_condition )
269
269
270
- def extract_graph_from_file_gcs (uri , userName , password , database , model , gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename , access_token , file_name , allowedNodes , allowedRelationship , retry_condition ):
270
+ async def extract_graph_from_file_gcs (uri , userName , password , database , model , gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename , access_token , file_name , allowedNodes , allowedRelationship , retry_condition ):
271
271
if retry_condition is None :
272
272
file_name , pages = get_documents_from_gcs (gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename , access_token )
273
273
if pages == None or len (pages )== 0 :
274
274
raise Exception (f'File content is not available for file : { file_name } ' )
275
- return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
275
+ return await processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
276
276
else :
277
- return processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
277
+ return await processing_source (uri , userName , password , database , model , file_name , [], allowedNodes , allowedRelationship , retry_condition = retry_condition )
278
278
279
- def processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship , is_uploaded_from_local = None , merged_file_path = None , retry_condition = None ):
279
+ async def processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship , is_uploaded_from_local = None , merged_file_path = None , retry_condition = None ):
280
280
"""
281
281
Extracts a Neo4jGraph from a PDF file based on the model.
282
282
@@ -366,7 +366,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages
366
366
break
367
367
else :
368
368
processing_chunks_start_time = time .time ()
369
- node_count ,rel_count ,latency_processed_chunk = processing_chunks (selected_chunks ,graph ,uri , userName , password , database ,file_name ,model ,allowedNodes ,allowedRelationship ,node_count , rel_count )
369
+ node_count ,rel_count ,latency_processed_chunk = await processing_chunks (selected_chunks ,graph ,uri , userName , password , database ,file_name ,model ,allowedNodes ,allowedRelationship ,node_count , rel_count )
370
370
processing_chunks_end_time = time .time ()
371
371
processing_chunks_elapsed_end_time = processing_chunks_end_time - processing_chunks_start_time
372
372
logging .info (f"Time taken { update_graph_chunk_processed } chunks processed upto { select_chunks_upto } completed in { processing_chunks_elapsed_end_time :.2f} seconds for file name { file_name } " )
@@ -439,7 +439,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages
439
439
logging .error (error_message )
440
440
raise Exception (error_message )
441
441
442
- def processing_chunks (chunkId_chunkDoc_list ,graph ,uri , userName , password , database ,file_name ,model ,allowedNodes ,allowedRelationship , node_count , rel_count ):
442
+ async def processing_chunks (chunkId_chunkDoc_list ,graph ,uri , userName , password , database ,file_name ,model ,allowedNodes ,allowedRelationship , node_count , rel_count ):
443
443
#create vector index and update chunk node with embedding
444
444
if graph is not None :
445
445
if graph ._driver ._closed :
@@ -456,7 +456,7 @@ def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, datab
456
456
logging .info ("Get graph document list from models" )
457
457
458
458
start_entity_extraction = time .time ()
459
- graph_documents = get_graph_from_llm (model , chunkId_chunkDoc_list , allowedNodes , allowedRelationship )
459
+ graph_documents = await get_graph_from_llm (model , chunkId_chunkDoc_list , allowedNodes , allowedRelationship )
460
460
end_entity_extraction = time .time ()
461
461
elapsed_entity_extraction = end_entity_extraction - start_entity_extraction
462
462
logging .info (f'Time taken to extract enitities from LLM Graph Builder: { elapsed_entity_extraction :.2f} seconds' )
0 commit comments