Skip to content

Commit 73e6b79

Browse files
prakriti-solankeyvasanthasaikallurikartikpersistentabhishekkumar-27
authored
Global search fulltext (#767)
* added global search+vector+fulltext mode * added community details in chunk entities * added node ids * updated vector graph query * added entities and modified chat response * added params * api response changes * added chunk entity query * modifies query * payload changes * added nodetails properties * payload new changes * communities check * communities selecetion check * Communities bug solutions (#770) * added local chat history * added write access check * added write access param * labels cahnge for nodes * added fulltext creation * disabled the write and delete actions for read only user mode * modified query * test updates * test uupdated * enable communities * removed the selected prop * Read Only User Support (#766) * added local chat history * added write access check * added write access param * added fulltext creation * disabled the write and delete actions for read only user mode * modified query --------- Co-authored-by: vasanthasaikalluri <[email protected]> * storing the gds status and write access on refresh * enable communities label change --------- Co-authored-by: vasanthasaikalluri <[email protected]> Co-authored-by: kartikpersistent <[email protected]> Co-authored-by: abhishekkumar-27 <[email protected]> * readonly fixed on refresh * clear chat history * slectedFiles check for Chatbot * clear history --------- Co-authored-by: vasanthasaikalluri <[email protected]> Co-authored-by: kartikpersistent <[email protected]> Co-authored-by: abhishekkumar-27 <[email protected]>
1 parent 364fd61 commit 73e6b79

File tree

20 files changed

+709
-392
lines changed

20 files changed

+709
-392
lines changed

backend/score.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,8 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database
271271
await asyncio.to_thread(create_entity_embedding, graph)
272272
json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
273273
logging.info(f'Entity Embeddings created')
274-
275-
if "create_communities" in tasks:
274+
275+
if "enable_communities" in tasks:
276276
model = "openai-gpt-4o"
277277
await asyncio.to_thread(create_communities, uri, userName, password, database,model)
278278
josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
@@ -321,10 +321,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(),
321321
gc.collect()
322322

323323
@app.post("/chunk_entities")
324-
async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), chunk_ids=Form(None),is_entity=Form()):
324+
async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form()):
325325
try:
326-
logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}")
327-
result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=json.loads(is_entity.lower()))
326+
result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode)
328327
json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
329328
logger.log_struct(json_obj, "INFO")
330329
return create_api_response('Success',data=result)

backend/src/QA_integration.py

+49-37
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ def get_sources_and_chunks(sources_used, docs):
147147
result = {
148148
'sources': sources_used,
149149
'chunkdetails': chunkdetails_list,
150-
"entities" : list()
151150
}
152151
return result
153152

@@ -182,16 +181,19 @@ def format_documents(documents, model):
182181
sorted_documents = sorted(documents, key=lambda doc: doc.state.get("query_similarity_score", 0), reverse=True)
183182
sorted_documents = sorted_documents[:prompt_token_cutoff]
184183

185-
formatted_docs = []
184+
formatted_docs = list()
186185
sources = set()
187-
lc_entities = {'entities':list()}
186+
entities = dict()
187+
global_communities = list()
188+
188189

189190
for doc in sorted_documents:
190191
try:
191192
source = doc.metadata.get('source', "unknown")
192193
sources.add(source)
193194

194-
lc_entities = doc.metadata if 'entities'in doc.metadata.keys() else lc_entities
195+
entities = doc.metadata['entities'] if 'entities'in doc.metadata.keys() else entities
196+
global_communities = doc.metadata["communitydetails"] if 'communitydetails'in doc.metadata.keys() else global_communities
195197

196198
formatted_doc = (
197199
"Document start\n"
@@ -204,13 +206,13 @@ def format_documents(documents, model):
204206
except Exception as e:
205207
logging.error(f"Error formatting document: {e}")
206208

207-
return "\n\n".join(formatted_docs), sources,lc_entities
209+
return "\n\n".join(formatted_docs), sources,entities,global_communities
208210

209211
def process_documents(docs, question, messages, llm, model,chat_mode_settings):
210212
start_time = time.time()
211213

212214
try:
213-
formatted_docs, sources,lc_entities = format_documents(docs, model)
215+
formatted_docs, sources, entitydetails, communities = format_documents(docs, model)
214216

215217
rag_chain = get_rag_chain(llm=llm)
216218

@@ -219,12 +221,25 @@ def process_documents(docs, question, messages, llm, model,chat_mode_settings):
219221
"context": formatted_docs,
220222
"input": question
221223
})
222-
if chat_mode_settings["mode"] == "entity search+vector":
223-
result = {'sources': list(),
224-
'chunkdetails': list()}
225-
result.update(lc_entities)
224+
225+
result = {'sources': list(), 'nodedetails': dict(), 'entities': dict()}
226+
node_details = {"chunkdetails":list(),"entitydetails":list(),"communitydetails":list()}
227+
entities = {'entityids':list(),"relationshipids":list()}
228+
229+
if chat_mode_settings["mode"] == CHAT_ENTITY_VECTOR_MODE:
230+
node_details["entitydetails"] = entitydetails
231+
232+
elif chat_mode_settings["mode"] == CHAT_GLOBAL_VECTOR_FULLTEXT_MODE:
233+
node_details["communitydetails"] = communities
226234
else:
227-
result = get_sources_and_chunks(sources, docs)
235+
sources_and_chunks = get_sources_and_chunks(sources, docs)
236+
result['sources'] = sources_and_chunks['sources']
237+
node_details["chunkdetails"] = sources_and_chunks["chunkdetails"]
238+
entities.update(entitydetails)
239+
240+
result["nodedetails"] = node_details
241+
result["entities"] = entities
242+
228243
content = ai_response.content
229244
total_tokens = get_total_tokens(ai_response, llm)
230245

@@ -295,10 +310,13 @@ def create_document_retriever_chain(llm, retriever):
295310

296311
def initialize_neo4j_vector(graph, chat_mode_settings):
297312
try:
298-
mode = chat_mode_settings.get('mode', 'undefined')
299313
retrieval_query = chat_mode_settings.get("retrieval_query")
300314
index_name = chat_mode_settings.get("index_name")
301315
keyword_index = chat_mode_settings.get("keyword_index", "")
316+
node_label = chat_mode_settings.get("node_label")
317+
embedding_node_property = chat_mode_settings.get("embedding_node_property")
318+
text_node_properties = chat_mode_settings.get("text_node_properties")
319+
302320

303321
if not retrieval_query or not index_name:
304322
raise ValueError("Required settings 'retrieval_query' or 'index_name' are missing.")
@@ -310,28 +328,21 @@ def initialize_neo4j_vector(graph, chat_mode_settings):
310328
retrieval_query=retrieval_query,
311329
graph=graph,
312330
search_type="hybrid",
313-
node_label="Chunk",
314-
embedding_node_property="embedding",
315-
text_node_properties=["text"],
331+
node_label=node_label,
332+
embedding_node_property=embedding_node_property,
333+
text_node_properties=text_node_properties,
316334
keyword_index_name=keyword_index
317335
)
318336
logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'")
319-
elif mode == "entity search+vector":
320-
neo_db = Neo4jVector.from_existing_index(
321-
embedding=EMBEDDING_FUNCTION,
322-
index_name=index_name,
323-
retrieval_query=retrieval_query,
324-
graph=graph
325-
)
326337
else:
327338
neo_db = Neo4jVector.from_existing_graph(
328339
embedding=EMBEDDING_FUNCTION,
329340
index_name=index_name,
330341
retrieval_query=retrieval_query,
331342
graph=graph,
332-
node_label="Chunk",
333-
embedding_node_property="embedding",
334-
text_node_properties=["text"]
343+
node_label=node_label,
344+
embedding_node_property=embedding_node_property,
345+
text_node_properties=text_node_properties
335346
)
336347
logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'")
337348
except Exception as e:
@@ -359,12 +370,12 @@ def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_
359370
logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold}")
360371
return retriever
361372

362-
def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD):
373+
def get_neo4j_retriever(graph, document_names,chat_mode_settings, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD):
363374
try:
364-
375+
365376
neo_db = initialize_neo4j_vector(graph, chat_mode_settings)
366377
document_names= list(map(str.strip, json.loads(document_names)))
367-
search_k = LOCAL_COMMUNITY_TOP_K if chat_mode_settings["mode"] == "entity search+vector" else CHAT_SEARCH_KWARG_K
378+
search_k = chat_mode_settings["top_k"]
368379
retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold)
369380
return retriever
370381
except Exception as e:
@@ -397,12 +408,13 @@ def process_chat_response(messages, history, question, model, graph, document_na
397408
try:
398409
llm, doc_retriever, model_version = setup_chat(model, graph, document_names, chat_mode_settings)
399410

400-
docs = retrieve_documents(doc_retriever, messages)
411+
docs = retrieve_documents(doc_retriever, messages)
412+
401413
if docs:
402414
content, result, total_tokens = process_documents(docs, question, messages, llm, model, chat_mode_settings)
403415
else:
404416
content = "I couldn't find any relevant documents to answer your question."
405-
result = {"sources": [], "chunkdetails": [], "entities": []}
417+
result = {"sources": list(), "nodedetails": list(), "entities": list()}
406418
total_tokens = 0
407419

408420
ai_response = AIMessage(content=content)
@@ -412,18 +424,18 @@ def process_chat_response(messages, history, question, model, graph, document_na
412424
summarization_thread.start()
413425
logging.info("Summarization thread started.")
414426
# summarize_and_log(history, messages, llm)
415-
427+
416428
return {
417429
"session_id": "",
418430
"message": content,
419431
"info": {
420432
"sources": result["sources"],
421433
"model": model_version,
422-
"chunkdetails": result["chunkdetails"],
434+
"nodedetails": result["nodedetails"],
423435
"total_tokens": total_tokens,
424436
"response_time": 0,
425437
"mode": chat_mode_settings["mode"],
426-
"entities": result["entities"]
438+
"entities": result["entities"],
427439
},
428440
"user": "chatbot"
429441
}
@@ -435,12 +447,12 @@ def process_chat_response(messages, history, question, model, graph, document_na
435447
"message": "Something went wrong",
436448
"info": {
437449
"sources": [],
438-
"chunkdetails": [],
450+
"nodedetails": [],
439451
"total_tokens": 0,
440452
"response_time": 0,
441453
"error": f"{type(e).__name__}: {str(e)}",
442454
"mode": chat_mode_settings["mode"],
443-
"entities": []
455+
"entities": [],
444456
},
445457
"user": "chatbot"
446458
}
@@ -593,7 +605,7 @@ def create_neo4j_chat_message_history(graph, session_id, write_access=True):
593605
raise
594606

595607
def get_chat_mode_settings(mode,settings_map=CHAT_MODE_CONFIG_MAP):
596-
default_settings = settings_map["default"]
608+
default_settings = settings_map[CHAT_DEFAULT_MODE]
597609
try:
598610
chat_mode_settings = settings_map.get(mode, default_settings)
599611
chat_mode_settings["mode"] = mode
@@ -615,7 +627,7 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access
615627
user_question = HumanMessage(content=question)
616628
messages.append(user_question)
617629

618-
if mode == "graph":
630+
if mode == CHAT_GRAPH_MODE:
619631
result = process_graph_response(model, graph, question, messages, history)
620632
else:
621633
chat_mode_settings = get_chat_mode_settings(mode=mode)

backend/src/chunkid_entities.py

+60-44
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,16 @@ def process_chunk_data(chunk_data):
8181
except Exception as e:
8282
logging.error(f"chunkid_entities module: An error occurred while extracting the Chunk text from records: {e}")
8383

84-
def process_chunkids(driver, chunk_ids):
84+
def process_chunkids(driver, chunk_ids, entities):
8585
"""
8686
Processes chunk IDs to retrieve chunk data.
8787
"""
8888
try:
8989
logging.info(f"Starting graph query process for chunk ids: {chunk_ids}")
90-
chunk_ids_list = chunk_ids.split(",")
91-
92-
records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list)
90+
records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids,entityIds=entities["entityids"], relationshipIds=entities["relationshipids"])
9391
result = process_records(records)
92+
result["nodes"].extend(records[0]["nodes"])
93+
result["nodes"] = remove_duplicate_nodes(result["nodes"])
9494
logging.info(f"Nodes and relationships are processed")
9595

9696
result["chunk_data"] = process_chunk_data(records)
@@ -118,79 +118,95 @@ def remove_duplicate_nodes(nodes,property="element_id"):
118118

119119
return unique_nodes
120120

121-
def process_entityids(driver, chunk_ids):
121+
def process_entityids(driver, entity_ids):
122122
"""
123123
Processes entity IDs to retrieve local community data.
124124
"""
125125
try:
126-
logging.info(f"Starting graph query process for entity ids: {chunk_ids}")
127-
entity_ids_list = chunk_ids.split(",")
126+
logging.info(f"Starting graph query process for entity ids: {entity_ids}")
128127
query_body = LOCAL_COMMUNITY_SEARCH_QUERY.format(
129128
topChunks=LOCAL_COMMUNITY_TOP_CHUNKS,
130129
topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES,
131130
topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS
132131
)
133132
query = LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX + query_body + LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX
134133

135-
records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list)
134+
records, summary, keys = driver.execute_query(query, entityIds=entity_ids)
136135

137136
result = process_records(records)
138137
if records:
139138
result["nodes"].extend(records[0]["nodes"])
140139
result["nodes"] = remove_duplicate_nodes(result["nodes"])
140+
141141
logging.info(f"Nodes and relationships are processed")
142+
142143
result["chunk_data"] = records[0]["chunks"]
143144
result["community_data"] = records[0]["communities"]
144145
else:
145146
result["chunk_data"] = list()
146147
result["community_data"] = list()
147-
logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}")
148+
logging.info(f"Query process completed successfully for chunk ids: {entity_ids}")
148149
return result
149150
except Exception as e:
150-
logging.error(f"chunkid_entities module: Error processing entity ids: {chunk_ids}. Error: {e}")
151+
logging.error(f"chunkid_entities module: Error processing entity ids: {entity_ids}. Error: {e}")
151152
raise
152153

153-
def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_entity=False):
154-
"""
155-
Retrieve and process nodes and relationships from a graph database given a list of chunk IDs.
154+
def process_communityids(driver, community_ids):
155+
"""Processes community IDs to retrieve community data."""
156+
try:
157+
logging.info(f"Starting graph query process for community ids: {community_ids}")
158+
query = GLOBAL_COMMUNITY_DETAILS_QUERY
159+
records, summary, keys = driver.execute_query(query, communityids=community_ids)
160+
161+
result = {"nodes": [], "relationships": [], "chunk_data": []}
162+
result["community_data"] = records[0]["communities"] if records else []
156163

157-
Parameters:
158-
uri (str): The URI of the graph database.
159-
username (str): The username for the database authentication.
160-
password (str): The password for the database authentication.
161-
chunk_ids (str): A comma-separated string of chunk IDs.
164+
logging.info(f"Query process completed successfully for community ids: {community_ids}")
165+
return result
166+
except Exception as e:
167+
logging.error(f"chunkid_entities module: Error processing community ids: {community_ids}. Error: {e}")
168+
raise
162169

163-
Returns:
164-
dict: A dictionary with 'nodes' and 'relationships' keys containing processed data, or an error message.
165-
"""
170+
def get_entities_from_chunkids(uri, username, password, database ,nodedetails,entities,mode):
166171
try:
167172

168173
driver = get_graphDB_driver(uri, username, password,database)
169-
if not is_entity:
170-
if chunk_ids:
171-
logging.info(f"chunkid_entities module: Starting for chunk ids : {chunk_ids}")
172-
result = process_chunkids(driver,chunk_ids)
174+
default_response = {"nodes": list(),"relationships": list(),"chunk_data": list(),"community_data": list(),}
175+
176+
nodedetails = json.loads(nodedetails)
177+
entities = json.loads(entities)
178+
179+
if mode == CHAT_GLOBAL_VECTOR_FULLTEXT_MODE:
180+
181+
if "communitydetails" in nodedetails and nodedetails["communitydetails"]:
182+
community_ids = [item["id"] for item in nodedetails["communitydetails"]]
183+
logging.info(f"chunkid_entities module: Starting for community ids: {community_ids}")
184+
return process_communityids(driver, community_ids)
185+
else:
186+
logging.info("chunkid_entities module: No community ids are passed")
187+
return default_response
188+
189+
elif mode == CHAT_ENTITY_VECTOR_MODE:
190+
191+
if "entitydetails" in nodedetails and nodedetails["entitydetails"]:
192+
entity_ids = [item["id"] for item in nodedetails["entitydetails"]]
193+
logging.info(f"chunkid_entities module: Starting for entity ids: {entity_ids}")
194+
return process_entityids(driver, entity_ids)
173195
else:
174-
logging.info(f"chunkid_entities module: No chunk ids are passed")
175-
result = {
176-
"nodes": [],
177-
"relationships": [],
178-
"chunk_data":[]
179-
}
180-
return result
181-
if chunk_ids:
182-
result = process_entityids(driver,chunk_ids)
183-
logging.info(f"chunkid_entities module: Starting for entity ids : {chunk_ids}")
196+
logging.info("chunkid_entities module: No entity ids are passed")
197+
return default_response
198+
184199
else:
185-
logging.info(f"chunkid_entities module: No entity ids are passed")
186-
result = {
187-
"nodes": [],
188-
"relationships": [],
189-
"chunk_data":[],
190-
"community_data":[]
191-
}
192-
return result
200+
201+
if "chunkdetails" in nodedetails and nodedetails["chunkdetails"]:
202+
chunk_ids = [item["id"] for item in nodedetails["chunkdetails"]]
203+
logging.info(f"chunkid_entities module: Starting for chunk ids: {chunk_ids}")
204+
return process_chunkids(driver, chunk_ids, entities)
205+
else:
206+
logging.info("chunkid_entities module: No chunk ids are passed")
207+
return default_response
193208

194209
except Exception as e:
195210
logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}")
196-
raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e
211+
raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e
212+

0 commit comments

Comments
 (0)