diff --git a/backend/score.py b/backend/score.py index 92a573aeb..a9e8e0b93 100644 --- a/backend/score.py +++ b/backend/score.py @@ -271,8 +271,8 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database await asyncio.to_thread(create_entity_embedding, graph) json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logging.info(f'Entity Embeddings created') - - if "create_communities" in tasks: + + if "enable_communities" in tasks: model = "openai-gpt-4o" await asyncio.to_thread(create_communities, uri, userName, password, database,model) josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} @@ -321,10 +321,9 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(), gc.collect() @app.post("/chunk_entities") -async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), chunk_ids=Form(None),is_entity=Form()): +async def chunk_entities(uri=Form(),userName=Form(), password=Form(), database=Form(), nodedetails=Form(None),entities=Form(),mode=Form()): try: - logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") - result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,chunk_ids=chunk_ids,is_entity=json.loads(is_entity.lower())) + result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, database=database,nodedetails=nodedetails,entities=entities,mode=mode) json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))} logger.log_struct(json_obj, "INFO") return create_api_response('Success',data=result) diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 868508815..169d2b8c3 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -147,7 +147,6 @@ def get_sources_and_chunks(sources_used, docs): result = { 'sources': sources_used, 'chunkdetails': chunkdetails_list, - "entities" : list() } return result @@ -182,16 +181,19 @@ def format_documents(documents, model): sorted_documents = sorted(documents, key=lambda doc: doc.state.get("query_similarity_score", 0), reverse=True) sorted_documents = sorted_documents[:prompt_token_cutoff] - formatted_docs = [] + formatted_docs = list() sources = set() - lc_entities = {'entities':list()} + entities = dict() + global_communities = list() + for doc in sorted_documents: try: source = doc.metadata.get('source', "unknown") sources.add(source) - lc_entities = doc.metadata if 'entities'in doc.metadata.keys() else lc_entities + entities = doc.metadata['entities'] if 'entities'in doc.metadata.keys() else entities + global_communities = doc.metadata["communitydetails"] if 'communitydetails'in doc.metadata.keys() else global_communities formatted_doc = ( "Document start\n" @@ -204,13 +206,13 @@ def format_documents(documents, model): except Exception as e: logging.error(f"Error formatting document: {e}") - return "\n\n".join(formatted_docs), sources,lc_entities + return "\n\n".join(formatted_docs), sources,entities,global_communities def process_documents(docs, question, messages, llm, model,chat_mode_settings): start_time = time.time() try: - formatted_docs, sources,lc_entities = format_documents(docs, model) + formatted_docs, sources, entitydetails, communities = format_documents(docs, model) rag_chain = get_rag_chain(llm=llm) @@ -219,12 +221,25 @@ def process_documents(docs, question, messages, llm, model,chat_mode_settings): "context": formatted_docs, "input": question }) - if chat_mode_settings["mode"] == "entity search+vector": - result = {'sources': list(), - 'chunkdetails': list()} - result.update(lc_entities) + + result = {'sources': list(), 'nodedetails': dict(), 'entities': dict()} + node_details = {"chunkdetails":list(),"entitydetails":list(),"communitydetails":list()} + entities = {'entityids':list(),"relationshipids":list()} + + if chat_mode_settings["mode"] == CHAT_ENTITY_VECTOR_MODE: + node_details["entitydetails"] = entitydetails + + elif chat_mode_settings["mode"] == CHAT_GLOBAL_VECTOR_FULLTEXT_MODE: + node_details["communitydetails"] = communities else: - result = get_sources_and_chunks(sources, docs) + sources_and_chunks = get_sources_and_chunks(sources, docs) + result['sources'] = sources_and_chunks['sources'] + node_details["chunkdetails"] = sources_and_chunks["chunkdetails"] + entities.update(entitydetails) + + result["nodedetails"] = node_details + result["entities"] = entities + content = ai_response.content total_tokens = get_total_tokens(ai_response, llm) @@ -295,10 +310,13 @@ def create_document_retriever_chain(llm, retriever): def initialize_neo4j_vector(graph, chat_mode_settings): try: - mode = chat_mode_settings.get('mode', 'undefined') retrieval_query = chat_mode_settings.get("retrieval_query") index_name = chat_mode_settings.get("index_name") keyword_index = chat_mode_settings.get("keyword_index", "") + node_label = chat_mode_settings.get("node_label") + embedding_node_property = chat_mode_settings.get("embedding_node_property") + text_node_properties = chat_mode_settings.get("text_node_properties") + if not retrieval_query or not index_name: raise ValueError("Required settings 'retrieval_query' or 'index_name' are missing.") @@ -310,28 +328,21 @@ def initialize_neo4j_vector(graph, chat_mode_settings): retrieval_query=retrieval_query, graph=graph, search_type="hybrid", - node_label="Chunk", - embedding_node_property="embedding", - text_node_properties=["text"], + node_label=node_label, + embedding_node_property=embedding_node_property, + text_node_properties=text_node_properties, keyword_index_name=keyword_index ) logging.info(f"Successfully retrieved Neo4jVector Fulltext index '{index_name}' and keyword index '{keyword_index}'") - elif mode == "entity search+vector": - neo_db = Neo4jVector.from_existing_index( - embedding=EMBEDDING_FUNCTION, - index_name=index_name, - retrieval_query=retrieval_query, - graph=graph - ) else: neo_db = Neo4jVector.from_existing_graph( embedding=EMBEDDING_FUNCTION, index_name=index_name, retrieval_query=retrieval_query, graph=graph, - node_label="Chunk", - embedding_node_property="embedding", - text_node_properties=["text"] + node_label=node_label, + embedding_node_property=embedding_node_property, + text_node_properties=text_node_properties ) logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") except Exception as e: @@ -359,12 +370,12 @@ def create_retriever(neo_db, document_names, chat_mode_settings,search_k, score_ logging.info(f"Successfully created retriever with search_k={search_k}, score_threshold={score_threshold}") return retriever -def get_neo4j_retriever(graph, document_names,chat_mode_settings, search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): +def get_neo4j_retriever(graph, document_names,chat_mode_settings, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: - + neo_db = initialize_neo4j_vector(graph, chat_mode_settings) document_names= list(map(str.strip, json.loads(document_names))) - search_k = LOCAL_COMMUNITY_TOP_K if chat_mode_settings["mode"] == "entity search+vector" else CHAT_SEARCH_KWARG_K + search_k = chat_mode_settings["top_k"] retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever except Exception as e: @@ -397,12 +408,13 @@ def process_chat_response(messages, history, question, model, graph, document_na try: llm, doc_retriever, model_version = setup_chat(model, graph, document_names, chat_mode_settings) - docs = retrieve_documents(doc_retriever, messages) + docs = retrieve_documents(doc_retriever, messages) + if docs: content, result, total_tokens = process_documents(docs, question, messages, llm, model, chat_mode_settings) else: content = "I couldn't find any relevant documents to answer your question." - result = {"sources": [], "chunkdetails": [], "entities": []} + result = {"sources": list(), "nodedetails": list(), "entities": list()} total_tokens = 0 ai_response = AIMessage(content=content) @@ -412,18 +424,18 @@ def process_chat_response(messages, history, question, model, graph, document_na summarization_thread.start() logging.info("Summarization thread started.") # summarize_and_log(history, messages, llm) - + return { "session_id": "", "message": content, "info": { "sources": result["sources"], "model": model_version, - "chunkdetails": result["chunkdetails"], + "nodedetails": result["nodedetails"], "total_tokens": total_tokens, "response_time": 0, "mode": chat_mode_settings["mode"], - "entities": result["entities"] + "entities": result["entities"], }, "user": "chatbot" } @@ -435,12 +447,12 @@ def process_chat_response(messages, history, question, model, graph, document_na "message": "Something went wrong", "info": { "sources": [], - "chunkdetails": [], + "nodedetails": [], "total_tokens": 0, "response_time": 0, "error": f"{type(e).__name__}: {str(e)}", "mode": chat_mode_settings["mode"], - "entities": [] + "entities": [], }, "user": "chatbot" } @@ -593,7 +605,7 @@ def create_neo4j_chat_message_history(graph, session_id, write_access=True): raise def get_chat_mode_settings(mode,settings_map=CHAT_MODE_CONFIG_MAP): - default_settings = settings_map["default"] + default_settings = settings_map[CHAT_DEFAULT_MODE] try: chat_mode_settings = settings_map.get(mode, default_settings) chat_mode_settings["mode"] = mode @@ -615,7 +627,7 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access user_question = HumanMessage(content=question) messages.append(user_question) - if mode == "graph": + if mode == CHAT_GRAPH_MODE: result = process_graph_response(model, graph, question, messages, history) else: chat_mode_settings = get_chat_mode_settings(mode=mode) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index aa8d1d4ca..8bb9c2198 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -81,16 +81,16 @@ def process_chunk_data(chunk_data): except Exception as e: logging.error(f"chunkid_entities module: An error occurred while extracting the Chunk text from records: {e}") -def process_chunkids(driver, chunk_ids): +def process_chunkids(driver, chunk_ids, entities): """ Processes chunk IDs to retrieve chunk data. """ try: logging.info(f"Starting graph query process for chunk ids: {chunk_ids}") - chunk_ids_list = chunk_ids.split(",") - - records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) + records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids,entityIds=entities["entityids"], relationshipIds=entities["relationshipids"]) result = process_records(records) + result["nodes"].extend(records[0]["nodes"]) + result["nodes"] = remove_duplicate_nodes(result["nodes"]) logging.info(f"Nodes and relationships are processed") result["chunk_data"] = process_chunk_data(records) @@ -118,13 +118,12 @@ def remove_duplicate_nodes(nodes,property="element_id"): return unique_nodes -def process_entityids(driver, chunk_ids): +def process_entityids(driver, entity_ids): """ Processes entity IDs to retrieve local community data. """ try: - logging.info(f"Starting graph query process for entity ids: {chunk_ids}") - entity_ids_list = chunk_ids.split(",") + logging.info(f"Starting graph query process for entity ids: {entity_ids}") query_body = LOCAL_COMMUNITY_SEARCH_QUERY.format( topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, @@ -132,65 +131,82 @@ def process_entityids(driver, chunk_ids): ) query = LOCAL_COMMUNITY_DETAILS_QUERY_PREFIX + query_body + LOCAL_COMMUNITY_DETAILS_QUERY_SUFFIX - records, summary, keys = driver.execute_query(query, entityIds=entity_ids_list) + records, summary, keys = driver.execute_query(query, entityIds=entity_ids) result = process_records(records) if records: result["nodes"].extend(records[0]["nodes"]) result["nodes"] = remove_duplicate_nodes(result["nodes"]) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = records[0]["chunks"] result["community_data"] = records[0]["communities"] else: result["chunk_data"] = list() result["community_data"] = list() - logging.info(f"Query process completed successfully for chunk ids: {chunk_ids}") + logging.info(f"Query process completed successfully for chunk ids: {entity_ids}") return result except Exception as e: - logging.error(f"chunkid_entities module: Error processing entity ids: {chunk_ids}. Error: {e}") + logging.error(f"chunkid_entities module: Error processing entity ids: {entity_ids}. Error: {e}") raise -def get_entities_from_chunkids(uri, username, password, database ,chunk_ids,is_entity=False): - """ - Retrieve and process nodes and relationships from a graph database given a list of chunk IDs. +def process_communityids(driver, community_ids): + """Processes community IDs to retrieve community data.""" + try: + logging.info(f"Starting graph query process for community ids: {community_ids}") + query = GLOBAL_COMMUNITY_DETAILS_QUERY + records, summary, keys = driver.execute_query(query, communityids=community_ids) + + result = {"nodes": [], "relationships": [], "chunk_data": []} + result["community_data"] = records[0]["communities"] if records else [] - Parameters: - uri (str): The URI of the graph database. - username (str): The username for the database authentication. - password (str): The password for the database authentication. - chunk_ids (str): A comma-separated string of chunk IDs. + logging.info(f"Query process completed successfully for community ids: {community_ids}") + return result + except Exception as e: + logging.error(f"chunkid_entities module: Error processing community ids: {community_ids}. Error: {e}") + raise - Returns: - dict: A dictionary with 'nodes' and 'relationships' keys containing processed data, or an error message. - """ +def get_entities_from_chunkids(uri, username, password, database ,nodedetails,entities,mode): try: driver = get_graphDB_driver(uri, username, password,database) - if not is_entity: - if chunk_ids: - logging.info(f"chunkid_entities module: Starting for chunk ids : {chunk_ids}") - result = process_chunkids(driver,chunk_ids) + default_response = {"nodes": list(),"relationships": list(),"chunk_data": list(),"community_data": list(),} + + nodedetails = json.loads(nodedetails) + entities = json.loads(entities) + + if mode == CHAT_GLOBAL_VECTOR_FULLTEXT_MODE: + + if "communitydetails" in nodedetails and nodedetails["communitydetails"]: + community_ids = [item["id"] for item in nodedetails["communitydetails"]] + logging.info(f"chunkid_entities module: Starting for community ids: {community_ids}") + return process_communityids(driver, community_ids) + else: + logging.info("chunkid_entities module: No community ids are passed") + return default_response + + elif mode == CHAT_ENTITY_VECTOR_MODE: + + if "entitydetails" in nodedetails and nodedetails["entitydetails"]: + entity_ids = [item["id"] for item in nodedetails["entitydetails"]] + logging.info(f"chunkid_entities module: Starting for entity ids: {entity_ids}") + return process_entityids(driver, entity_ids) else: - logging.info(f"chunkid_entities module: No chunk ids are passed") - result = { - "nodes": [], - "relationships": [], - "chunk_data":[] - } - return result - if chunk_ids: - result = process_entityids(driver,chunk_ids) - logging.info(f"chunkid_entities module: Starting for entity ids : {chunk_ids}") + logging.info("chunkid_entities module: No entity ids are passed") + return default_response + else: - logging.info(f"chunkid_entities module: No entity ids are passed") - result = { - "nodes": [], - "relationships": [], - "chunk_data":[], - "community_data":[] - } - return result + + if "chunkdetails" in nodedetails and nodedetails["chunkdetails"]: + chunk_ids = [item["id"] for item in nodedetails["chunkdetails"]] + logging.info(f"chunkid_entities module: Starting for chunk ids: {chunk_ids}") + return process_chunkids(driver, chunk_ids, entities) + else: + logging.info("chunkid_entities module: No chunk ids are passed") + return default_response except Exception as e: logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}") - raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e \ No newline at end of file + raise Exception(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Please check the logs for more details.") from e + diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index f76812553..834459fd1 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -22,82 +22,150 @@ GRAPH_QUERY = """ MATCH docs = (d:Document) WHERE d.fileName IN $document_names -WITH docs, d ORDER BY d.createdAt DESC -// fetch chunks for documents, currently with limit +WITH docs, d +ORDER BY d.createdAt DESC + +// Fetch chunks for documents, currently with limit CALL {{ WITH d - OPTIONAL MATCH chunks=(d)<-[:PART_OF|FIRST_CHUNK]-(c:Chunk) + OPTIONAL MATCH chunks = (d)<-[:PART_OF|FIRST_CHUNK]-(c:Chunk) RETURN c, chunks LIMIT {graph_chunk_limit} }} -WITH collect(distinct docs) as docs, collect(distinct chunks) as chunks, collect(distinct c) as selectedChunks -WITH docs, chunks, selectedChunks -// select relationships between selected chunks +WITH collect(distinct docs) AS docs, + collect(distinct chunks) AS chunks, + collect(distinct c) AS selectedChunks + +// Select relationships between selected chunks WITH *, -[ c in selectedChunks | [p=(c)-[:NEXT_CHUNK|SIMILAR]-(other) WHERE other IN selectedChunks | p]] as chunkRels + [c IN selectedChunks | + [p = (c)-[:NEXT_CHUNK|SIMILAR]-(other) + WHERE other IN selectedChunks | p]] AS chunkRels -// fetch entities and relationships between entities +// Fetch entities and relationships between entities CALL {{ WITH selectedChunks - UNWIND selectedChunks as c - - OPTIONAL MATCH entities=(c:Chunk)-[:HAS_ENTITY]->(e) - OPTIONAL MATCH entityRels=(e)--(e2:!Chunk) WHERE exists {{ + UNWIND selectedChunks AS c + OPTIONAL MATCH entities = (c:Chunk)-[:HAS_ENTITY]->(e) + OPTIONAL MATCH entityRels = (e)--(e2:!Chunk) + WHERE exists {{ (e2)<-[:HAS_ENTITY]-(other) WHERE other IN selectedChunks }} - RETURN entities , entityRels, collect(DISTINCT e) as entity + RETURN entities, entityRels, collect(DISTINCT e) AS entity }} -WITH docs,chunks,chunkRels, collect(entities) as entities, collect(entityRels) as entityRels, entity + +WITH docs, chunks, chunkRels, + collect(entities) AS entities, + collect(entityRels) AS entityRels, + entity WITH * CALL {{ - with entity - unwind entity as n - OPTIONAL MATCH community=(n:__Entity__)-[:IN_COMMUNITY]->(p:__Community__) - OPTIONAL MATCH parentcommunity=(p)-[:PARENT_COMMUNITY*]->(p2:__Community__) - return collect(community) as communities , collect(parentcommunity) as parentCommunities + WITH entity + UNWIND entity AS n + OPTIONAL MATCH community = (n:__Entity__)-[:IN_COMMUNITY]->(p:__Community__) + OPTIONAL MATCH parentcommunity = (p)-[:PARENT_COMMUNITY*]->(p2:__Community__) + RETURN collect(community) AS communities, + collect(parentcommunity) AS parentCommunities +}} + +WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + communities + parentCommunities, true) AS paths + +// Distinct nodes and relationships +CALL {{ + WITH paths + UNWIND paths AS path + UNWIND nodes(path) AS node + WITH distinct node + RETURN collect(node /* {{.*, labels:labels(node), elementId:elementId(node), embedding:null, text:null}} */) AS nodes }} -WITH apoc.coll.flatten(docs + chunks + chunkRels + entities + entityRels + communities + parentCommunities, true) as paths +CALL {{ + WITH paths + UNWIND paths AS path + UNWIND relationships(path) AS rel + RETURN collect(distinct rel) AS rels +}} -// distinct nodes and rels -CALL {{ WITH paths UNWIND paths AS path UNWIND nodes(path) as node WITH distinct node - RETURN collect(node /* {{.*, labels:labels(node), elementId:elementId(node), embedding:null, text:null}} */) AS nodes }} -CALL {{ WITH paths UNWIND paths AS path UNWIND relationships(path) as rel RETURN collect(distinct rel) AS rels }} RETURN nodes, rels """ CHUNK_QUERY = """ -match (chunk:Chunk) where chunk.id IN $chunksIds - +MATCH (chunk:Chunk) +WHERE chunk.id IN $chunksIds MATCH (chunk)-[:PART_OF]->(d:Document) -CALL {WITH chunk -MATCH (chunk)-[:HAS_ENTITY]->(e) -MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk &! Document &! `__Community__`) -UNWIND rels as r -RETURN collect(distinct r) as rels -} -WITH d, collect(distinct chunk) as chunks, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -RETURN d as doc, [chunk in chunks | chunk {.*, embedding:null}] as chunks, - [r in rels | {startNode:{element_id:elementId(startNode(r)), labels:labels(startNode(r)), properties:{id:startNode(r).id,description:startNode(r).description}}, - endNode:{element_id:elementId(endNode(r)), labels:labels(endNode(r)), properties:{id:endNode(r).id,description:endNode(r).description}}, - relationship: {type:type(r), element_id:elementId(r)}}] as entities + +WITH d, + collect(distinct chunk) AS chunks + +// Collect relationships and nodes +WITH d, chunks, + collect { + MATCH ()-[r]->() + WHERE elementId(r) IN $relationshipIds + RETURN r + } AS rels, + collect { + MATCH (e) + WHERE elementId(e) IN $entityIds + RETURN e + } AS nodes + +WITH d, + chunks, + apoc.coll.toSet(apoc.coll.flatten(rels)) AS rels, + nodes + +RETURN + d AS doc, + [chunk IN chunks | + chunk {.*, embedding: null} + ] AS chunks, + [ + node IN nodes | + { + element_id: elementId(node), + labels: labels(node), + properties: { + id: node.id, + description: node.description + } + } + ] AS nodes, + [ + r IN rels | + { + startNode: { + element_id: elementId(startNode(r)), + labels: labels(startNode(r)), + properties: { + id: startNode(r).id, + description: startNode(r).description + } + }, + endNode: { + element_id: elementId(endNode(r)), + labels: labels(endNode(r)), + properties: { + id: endNode(r).id, + description: endNode(r).description + } + }, + relationship: { + type: type(r), + element_id: elementId(r) + } + } + ] AS entities """ ## CHAT SETUP CHAT_MAX_TOKENS = 1000 -CHAT_SEARCH_KWARG_K = 10 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 CHAT_DOC_SPLIT_SIZE = 3000 CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10 -CHAT_TOKEN_CUT_OFF = { - ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro","gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, - ("openai-gpt-4","diffbot" ,'azure_ai_gpt_4o',"openai-gpt-4o", "openai-gpt-4o-mini") : 28, - ("ollama_llama3") : 2 -} - CHAT_TOKEN_CUT_OFF = { ("openai-gpt-3.5",'azure_ai_gpt_35',"gemini-1.0-pro","gemini-1.5-pro", "gemini-1.5-flash","groq-llama3",'groq_llama3_70b','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet', ) : 4, @@ -150,85 +218,146 @@ QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." -## CHAT QUERIES +## CHAT QUERIES +VECTOR_SEARCH_TOP_K = 10 + VECTOR_SEARCH_QUERY = """ WITH node AS chunk, score MATCH (chunk)-[:PART_OF]->(d:Document) -WITH d, collect(distinct {chunk: chunk, score: score}) as chunks, avg(score) as avg_score +WITH d, + collect(distinct {chunk: chunk, score: score}) AS chunks, + avg(score) AS avg_score + WITH d, avg_score, - [c in chunks | c.chunk.text] as texts, - [c in chunks | {id: c.chunk.id, score: c.score}] as chunkdetails -WITH d, avg_score, chunkdetails, - apoc.text.join(texts, "\n----\n") as text -RETURN text, avg_score AS score, - {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} as metadata + [c IN chunks | c.chunk.text] AS texts, + [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails + +WITH d, avg_score, chunkdetails, + apoc.text.join(texts, "\n----\n") AS text + +RETURN text, + avg_score AS score, + {source: COALESCE(CASE WHEN d.url CONTAINS "None" + THEN d.fileName + ELSE d.url + END, + d.fileName), + chunkdetails: chunkdetails} AS metadata """ +### Vector graph search VECTOR_GRAPH_SEARCH_ENTITY_LIMIT = 25 +VECTOR_GRAPH_SEARCH_EMBEDDING_MIN_MATCH = 0.3 +VECTOR_GRAPH_SEARCH_EMBEDDING_MAX_MATCH = 0.9 +VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE = 10 +VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE = 25 -VECTOR_GRAPH_SEARCH_QUERY = """ +VECTOR_GRAPH_SEARCH_QUERY_PREFIX = """ WITH node as chunk, score // find the document of the chunk MATCH (chunk)-[:PART_OF]->(d:Document) - // aggregate chunk-details -WITH d, collect(DISTINCT {{chunk: chunk, score: score}}) AS chunks, avg(score) as avg_score +WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score // fetch entities -CALL {{ WITH chunks +CALL { WITH chunks UNWIND chunks as chunkScore WITH chunkScore.chunk as chunk -// entities connected to the chunk -// todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks -// todo sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes? -OPTIONAL MATCH (chunk)-[:HAS_ENTITY]->(e) -WITH e, count(*) as numChunks -ORDER BY numChunks DESC LIMIT {no_of_entites} -// depending on match to query embedding either 1 or 2 step expansion -WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95 -THEN -collect {{ OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,1}}(:!Chunk&!Document) RETURN path }} -ELSE -collect {{ OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,2}}(:!Chunk&!Document) RETURN path }} -END as paths, e -WITH apoc.coll.toSet(apoc.coll.flatten(collect(distinct paths))) as paths, collect(distinct e) as entities -// de-duplicate nodes and relationships across chunks -RETURN collect{{ unwind paths as p unwind relationships(p) as r return distinct r}} as rels, -collect{{ unwind paths as p unwind nodes(p) as n return distinct n}} as nodes, entities -}} +""" -// generate metadata and text components for chunks, nodes and relationships +VECTOR_GRAPH_SEARCH_ENTITY_QUERY = """ + OPTIONAL MATCH (chunk)-[:HAS_ENTITY]->(e) + WITH e, count(*) AS numChunks + ORDER BY numChunks DESC + LIMIT {no_of_entites} + + WITH + CASE + WHEN e.embedding IS NULL OR ({embedding_match_min} <= vector.similarity.cosine($embedding, e.embedding) AND vector.similarity.cosine($embedding, e.embedding) <= {embedding_match_max}) THEN + collect {{ + OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,1}}(:!Chunk&!Document&!__Community__) + RETURN path LIMIT {entity_limit_minmax_case} + }} + WHEN e.embedding IS NOT NULL AND vector.similarity.cosine($embedding, e.embedding) > {embedding_match_max} THEN + collect {{ + OPTIONAL MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){{0,2}}(:!Chunk&!Document&!__Community__) + RETURN path LIMIT {entity_limit_max_case} + }} + ELSE + collect {{ + MATCH path=(e) + RETURN path + }} + END AS paths, e +""" + +VECTOR_GRAPH_SEARCH_QUERY_SUFFIX = """ + WITH apoc.coll.toSet(apoc.coll.flatten(collect(DISTINCT paths))) AS paths, + collect(DISTINCT e) AS entities + + // De-duplicate nodes and relationships across chunks + RETURN + collect { + UNWIND paths AS p + UNWIND relationships(p) AS r + RETURN DISTINCT r + } AS rels, + collect { + UNWIND paths AS p + UNWIND nodes(p) AS n + RETURN DISTINCT n + } AS nodes, + entities +} + +// Generate metadata and text components for chunks, nodes, and relationships WITH d, avg_score, [c IN chunks | c.chunk.text] AS texts, - [c IN chunks | {{id: c.chunk.id, score: c.score}}] AS chunkdetails, - apoc.coll.sort([n in nodes | - -coalesce(apoc.coll.removeAll(labels(n),['__Entity__'])[0],"") +":"+ -n.id + (case when n.description is not null then " ("+ n.description+")" else "" end)]) as nodeTexts, - apoc.coll.sort([r in rels - // optional filter if we limit the node-set - // WHERE startNode(r) in nodes AND endNode(r) in nodes - | -coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ -startNode(r).id + -" " + type(r) + " " + -coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id -]) as relTexts -, entities -// combine texts into response-text - -WITH d, avg_score,chunkdetails, -"Text Content:\\n" + -apoc.text.join(texts,"\\n----\\n") + -"\\n----\\nEntities:\\n"+ -apoc.text.join(nodeTexts,"\\n") + -"\\n----\\nRelationships:\\n" + -apoc.text.join(relTexts,"\\n") - -as text,entities - -RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata + [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, + [n IN nodes | elementId(n)] AS entityIds, + [r IN rels | elementId(r)] AS relIds, + apoc.coll.sort([ + n IN nodes | + coalesce(apoc.coll.removeAll(labels(n), ['__Entity__'])[0], "") + ":" + + n.id + + (CASE WHEN n.description IS NOT NULL THEN " (" + n.description + ")" ELSE "" END) + ]) AS nodeTexts, + apoc.coll.sort([ + r IN rels | + coalesce(apoc.coll.removeAll(labels(startNode(r)), ['__Entity__'])[0], "") + ":" + + startNode(r).id + " " + type(r) + " " + + coalesce(apoc.coll.removeAll(labels(endNode(r)), ['__Entity__'])[0], "") + ":" + endNode(r).id + ]) AS relTexts, + entities + +// Combine texts into response text +WITH d, avg_score, chunkdetails, entityIds, relIds, + "Text Content:\n" + apoc.text.join(texts, "\n----\n") + + "\n----\nEntities:\n" + apoc.text.join(nodeTexts, "\n") + + "\n----\nRelationships:\n" + apoc.text.join(relTexts, "\n") AS text, + entities + +RETURN + text, + avg_score AS score, + { + length: size(text), + source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), + chunkdetails: chunkdetails, + entities : { + entityids: entityIds, + relationshipids: relIds + } + } AS metadata """ +VECTOR_GRAPH_SEARCH_QUERY = VECTOR_GRAPH_SEARCH_QUERY_PREFIX+ VECTOR_GRAPH_SEARCH_ENTITY_QUERY.format( + no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT, + embedding_match_min=VECTOR_GRAPH_SEARCH_EMBEDDING_MIN_MATCH, + embedding_match_max=VECTOR_GRAPH_SEARCH_EMBEDDING_MAX_MATCH, + entity_limit_minmax_case=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MINMAX_CASE, + entity_limit_max_case=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT_MAX_CASE +) + VECTOR_GRAPH_SEARCH_QUERY_SUFFIX + ### Local community search LOCAL_COMMUNITY_TOP_K = 10 LOCAL_COMMUNITY_TOP_CHUNKS = 3 @@ -392,45 +521,112 @@ ] AS entities """ +LOCAL_COMMUNITY_SEARCH_QUERY_FORMATTED = LOCAL_COMMUNITY_SEARCH_QUERY.format( + topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, + topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, + topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS)+LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX + +GLOBAL_SEARCH_TOP_K = 10 + +GLOBAL_VECTOR_SEARCH_QUERY = """ +WITH collect(distinct {community: node, score: score}) AS communities, + avg(score) AS avg_score + +WITH avg_score, + [c IN communities | c.community.summary] AS texts, + [c IN communities | {id: elementId(c.community), score: c.score}] AS communityDetails + +WITH avg_score, communityDetails, + apoc.text.join(texts, "\n----\n") AS text + +RETURN text, + avg_score AS score, + {communitydetails: communityDetails} AS metadata +""" + + + +GLOBAL_COMMUNITY_DETAILS_QUERY = """ +MATCH (community:__Community__) +WHERE elementId(community) IN $communityids +WITH collect(distinct community) AS communities +RETURN [community IN communities | + community {.*, embedding: null, elementid: elementId(community)}] AS communities +""" + +## CHAT MODES + +CHAT_VECTOR_MODE = "vector" +CHAT_FULLTEXT_MODE = "fulltext" +CHAT_ENTITY_VECTOR_MODE = "entity search+vector" +CHAT_VECTOR_GRAPH_MODE = "graph+vector" +CHAT_VECTOR_GRAPH_FULLTEXT_MODE = "graph+vector+fulltext" +CHAT_GLOBAL_VECTOR_FULLTEXT_MODE = "global search+vector+fulltext" +CHAT_GRAPH_MODE = "graph" +CHAT_DEFAULT_MODE = "graph+vector+fulltext" + CHAT_MODE_CONFIG_MAP= { - "vector": { + CHAT_VECTOR_MODE : { "retrieval_query": VECTOR_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": None, - "document_filter": True + "document_filter": True, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], + }, - "fulltext": { + CHAT_FULLTEXT_MODE : { "retrieval_query": VECTOR_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": "keyword", - "document_filter": False + "document_filter": False, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], }, - "entity search+vector": { - "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY.format(topChunks=LOCAL_COMMUNITY_TOP_CHUNKS, - topCommunities=LOCAL_COMMUNITY_TOP_COMMUNITIES, - topOutsideRels=LOCAL_COMMUNITY_TOP_OUTSIDE_RELS)+LOCAL_COMMUNITY_SEARCH_QUERY_SUFFIX, + CHAT_ENTITY_VECTOR_MODE : { + "retrieval_query": LOCAL_COMMUNITY_SEARCH_QUERY_FORMATTED, + "top_k": LOCAL_COMMUNITY_TOP_K, "index_name": "entity_vector", "keyword_index": None, - "document_filter": False + "document_filter": False, + "node_label": "__Entity__", + "embedding_node_property":"embedding", + "text_node_properties":["id"], }, - "graph+vector": { - "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + CHAT_VECTOR_GRAPH_MODE : { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": None, - "document_filter": True + "document_filter": True, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], }, - "graph+vector+fulltext": { - "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT), + CHAT_VECTOR_GRAPH_FULLTEXT_MODE : { + "retrieval_query": VECTOR_GRAPH_SEARCH_QUERY, + "top_k": VECTOR_SEARCH_TOP_K, "index_name": "vector", "keyword_index": "keyword", - "document_filter": False + "document_filter": False, + "node_label": "Chunk", + "embedding_node_property":"embedding", + "text_node_properties":["text"], + }, + CHAT_GLOBAL_VECTOR_FULLTEXT_MODE : { + "retrieval_query": GLOBAL_VECTOR_SEARCH_QUERY, + "top_k": GLOBAL_SEARCH_TOP_K, + "index_name": "community_vector", + "keyword_index": "community_keyword", + "document_filter": False, + "node_label": "__Community__", + "embedding_node_property":"embedding", + "text_node_properties":["summary"], }, - "default": { - "retrieval_query": VECTOR_SEARCH_QUERY, - "index_name": "vector", - "keyword_index": None, - "document_filter": True - } } YOUTUBE_CHUNK_SIZE_SECONDS = 60 diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 74823520a..568868000 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -38,15 +38,24 @@ const ChatInfoModal: React.FC = ({ model, total_tokens, response_time, - chunk_ids, + nodeDetails, mode, cypher_query, graphonly_entities, error, + entities_ids }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - const [activeTab, setActiveTab] = useState(error?.length ? 10 : mode === chatModeLables.graph ? 4 : 3); + const [activeTab, setActiveTab] = useState( + error?.length + ? 10 + : mode === chatModeLables.global_vector + ? 7 + : mode === chatModeLables.graph + ? 4 + : 3 + ); const [infoEntities, setInfoEntities] = useState([]); const [communities, setCommunities] = useState([]); const [loading, setLoading] = useState(false); @@ -86,11 +95,8 @@ const ChatInfoModal: React.FC = ({ (async () => { setLoading(true); try { - const response = await chunkEntitiesAPI( - userCredentials as UserCredentials, - chunk_ids.map((c) => c.id).join(','), - userCredentials?.database, - mode === chatModeLables.entity_vector + const response = await chunkEntitiesAPI(userCredentials as UserCredentials, userCredentials?.database, nodeDetails, entities_ids, + mode, ); if (response.data.status === 'Failure') { throw new Error(response.data.error); @@ -123,17 +129,27 @@ const ChatInfoModal: React.FC = ({ }) ); setRelationships(relationshipsData ?? []); - setCommunities(communitiesData ?? []); + setCommunities(communitiesData.map((community: any) => { + const communityScore = nodeDetails?.communitydetails?.find((c: any) => + c.id === community.element_id); + return { + ...community, + score: communityScore?.score || 1 + }; + }) + .sort((a: any, b: any) => b.score - a.score) + ); + setChunks( chunksData .map((chunk: any) => { - const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); - return ( - { - ...chunk, - score: chunkScore?.score, - } ?? [] - ); + const chunkScore = nodeDetails?.chunkdetails?.find((c: any) => + c.id + === chunk.id); + return { + ...chunk, + score: chunkScore?.score + }; }) .sort((a: any, b: any) => b.score - a.score) ); @@ -147,7 +163,7 @@ const ChatInfoModal: React.FC = ({ () => { setcopiedText(false); }; - }, [chunk_ids, mode, error]); + }, [nodeDetails, mode, error]); const onChangeTabs = (tabId: number) => { setActiveTab(tabId); @@ -175,22 +191,33 @@ const ChatInfoModal: React.FC = ({ {error} ) : ( - {mode != chatModeLables.graph ? Sources used : <>} - {mode != chatModeLables.graph ? Chunks : <>} - {mode === chatModeLables.graph_vector || - mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( - Top Entities used - ) : ( - <> - )} - {mode === chatModeLables.graph && cypher_query?.trim()?.length ? ( - Generated Cypher Query + {mode === chatModeLables.global_vector ? ( + // Only show the Communities tab if mode is global + Communities ) : ( - <> + <> + {mode != chatModeLables.graph ? Sources used : <>} + {mode != chatModeLables.graph ? Chunks : <>} + {mode === chatModeLables.graph_vector || + mode === chatModeLables.graph || + mode === chatModeLables.graph_vector_fulltext || + mode === chatModeLables.entity_vector ? ( + Top Entities used + ) : ( + <> + )} + {mode === chatModeLables.graph && cypher_query?.trim()?.length ? ( + Generated Cypher Query + ) : ( + <> + )} + {mode === chatModeLables.entity_vector ? ( + Communities + ) : ( + <> + )} + )} - {mode === chatModeLables.entity_vector ? Communities : <>} )} @@ -217,7 +244,7 @@ const ChatInfoModal: React.FC = ({ className='min-h-40' /> - {mode === chatModeLables.entity_vector ? ( + {mode === chatModeLables.entity_vector || mode === chatModeLables.global_vector ? ( diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index 187c37178..b3d493236 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -8,7 +8,7 @@ import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; export default function ChatModeToggle({ menuAnchor, - closeHandler = () => {}, + closeHandler = () => { }, open, anchorPortal = true, disableBackdrop = false, @@ -20,30 +20,35 @@ export default function ChatModeToggle({ disableBackdrop?: boolean; }) { const { setchatMode, chatMode, postProcessingTasks, selectedRows } = useFileContext(); - const isCommunityAllowed = postProcessingTasks.includes('create_communities'); + const isCommunityAllowed = postProcessingTasks.includes('enable_communities'); const { isGdsActive } = useCredentials(); useEffect(() => { - if (selectedRows.length !== 0) { - setchatMode(chatModeLables.graph_vector); - } else { - setchatMode(chatModeLables.graph_vector_fulltext); + // If rows are selected, the mode is valid (either vector or graph+vector) + if (selectedRows.length > 0) { + if (!(chatMode === chatModeLables.vector || chatMode === chatModeLables.graph_vector)) { + setchatMode(chatModeLables.graph_vector); + } } - }, [selectedRows]); - + }, [selectedRows.length, chatMode, setchatMode]); const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? chatModes - : chatModes?.filter((m) => !m.mode.includes(chatModeLables.entity_vector)); + : chatModes?.filter( + (m) => + !m.mode.includes(chatModeLables.entity_vector) && + !m.mode.includes(chatModeLables.global_vector) + ); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { const isDisabled = Boolean( - selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) + selectedRows.length && + !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) ); const handleModeChange = () => { if (isDisabled) { - setchatMode(chatModeLables.graph_vector); + setchatMode(chatModeLables.graph_vector); } else { setchatMode(m.mode); } @@ -52,11 +57,11 @@ export default function ChatModeToggle({ return { title: (
- + {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)}
- {m.description} + {m.description}
), @@ -66,12 +71,12 @@ export default function ChatModeToggle({ {chatMode === m.mode && ( <> - {chatModeLables.selected} + {chatModeLables.selected} )} {isDisabled && ( <> - {chatModeLables.unavailableChatMode} + {chatModeLables.unavailableChatMode} )} @@ -82,10 +87,9 @@ export default function ChatModeToggle({ useEffect(() => { if (!selectedRows.length && !chatMode) { - setchatMode(chatMode); + setchatMode(chatModeLables.graph_vector_fulltext); } - }, [setchatMode, selectedRows, chatMode]); - + }, [setchatMode, selectedRows.length, chatMode]); return ( = (props) => { const [sourcesModal, setSourcesModal] = useState([]); const [modelModal, setModelModal] = useState(''); const [responseTime, setResponseTime] = useState(0); - const [chunkModal, setChunkModal] = useState([]); const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); const [copyMessageId, setCopyMessageId] = useState(null); const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector_fulltext); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); + const [entitiesModal, setEntitiesModal] = useState([]); + const [nodeDetailsModal, setNodeDetailsModal] = useState({}); const [value, copy] = useCopyToClipboard(); const { speak, cancel } = useSpeechSynthesis({ @@ -56,16 +57,10 @@ const Chatbot: FC = (props) => { setListMessages((msgs) => msgs.map((msg) => ({ ...msg, speaking: false }))); }, }); - let selectedFileNames: CustomFile[] = []; - for (let index = 0; index < selectedRows.length; index++) { - const id = selectedRows[index]; - for (let index = 0; index < filesData.length; index++) { - const f = filesData[index]; - if (f.id === id) { - selectedFileNames.push(f); - } - } - } + + let selectedFileNames: CustomFile[] = filesData.filter(f => + selectedRows.includes(f.id) && ['Completed'].includes(f.status) + ); const handleInputChange = (e: React.ChangeEvent) => { setInputMessage(e.target.value); @@ -82,7 +77,6 @@ const Chatbot: FC = (props) => { reply: string; sources?: string[]; model?: string; - chunk_ids?: chunk[]; total_tokens?: number; response_time?: number; speaking?: boolean; @@ -91,7 +85,8 @@ const Chatbot: FC = (props) => { cypher_query?: string; graphonly_entities?: []; error?: string; - entitiysearchonly_entities?: chunk[]; + entitiysearchonly_entities?: string[]; + nodeDetails?: nodeDetailsProps; }, index = 0 ) => { @@ -113,7 +108,6 @@ const Chatbot: FC = (props) => { isLoading: true, sources: response?.sources, model: response?.model, - chunks: response?.chunk_ids, total_tokens: response.total_tokens, response_time: response?.response_time, speaking: false, @@ -123,6 +117,7 @@ const Chatbot: FC = (props) => { graphonly_entities: response?.graphonly_entities, error: response.error, entitiysearchonly_entities: response.entitiysearchonly_entities, + nodeDetails: response?.nodeDetails }, ]); } else { @@ -136,7 +131,6 @@ const Chatbot: FC = (props) => { lastmsg.isLoading = false; lastmsg.sources = response?.sources; lastmsg.model = response?.model; - lastmsg.chunk_ids = response?.chunk_ids; lastmsg.total_tokens = response?.total_tokens; lastmsg.response_time = response?.response_time; lastmsg.speaking = false; @@ -146,6 +140,7 @@ const Chatbot: FC = (props) => { lastmsg.graphonly_entities = response.graphonly_entities; lastmsg.error = response.error; lastmsg.entities = response.entitiysearchonly_entities; + lastmsg.nodeDetails = response?.nodeDetails; return msgs.map((msg, index) => { if (index === msgs.length - 1) { return lastmsg; @@ -172,7 +167,7 @@ const Chatbot: FC = (props) => { let chatbotReply; let chatSources; let chatModel; - let chatChunks; + let chatnodedetails; let chatTimeTaken; let chatTokensUsed; let chatingMode; @@ -180,6 +175,7 @@ const Chatbot: FC = (props) => { let graphonly_entities; let error; let entitiysearchonly_entities; + let chatEntities; const datetime = `${date.toLocaleDateString()} ${date.toLocaleTimeString()}`; const userMessage = { id: Date.now(), user: 'user', message: inputMessage, datetime: datetime, mode: chatMode }; setListMessages([...listMessages, userMessage]); @@ -198,7 +194,7 @@ const Chatbot: FC = (props) => { chatbotReply = chatresponse?.data?.data?.message; chatSources = chatresponse?.data?.data?.info.sources; chatModel = chatresponse?.data?.data?.info.model; - chatChunks = chatresponse?.data?.data?.info.chunkdetails; + chatnodedetails = chatresponse?.data?.data?.info.nodedetails; chatTokensUsed = chatresponse?.data?.data?.info.total_tokens; chatTimeTaken = chatresponse?.data?.data?.info.response_time; chatingMode = chatresponse?.data?.data?.info?.mode; @@ -206,11 +202,11 @@ const Chatbot: FC = (props) => { graphonly_entities = chatresponse?.data.data.info.context ?? []; entitiysearchonly_entities = chatresponse?.data.data.info.entities; error = chatresponse.data.data.info.error ?? ''; + chatEntities = chatresponse.data.data.info.entities; const finalbotReply = { reply: chatbotReply, sources: chatSources, model: chatModel, - chunk_ids: chatChunks, total_tokens: chatTokensUsed, response_time: chatTimeTaken, speaking: false, @@ -220,6 +216,8 @@ const Chatbot: FC = (props) => { graphonly_entities, error, entitiysearchonly_entities, + chatEntities, + nodeDetails: chatnodedetails }; simulateTypingEffect(finalbotReply); } catch (error) { @@ -324,9 +322,8 @@ const Chatbot: FC = (props) => { @@ -338,11 +335,10 @@ const Chatbot: FC = (props) => { } >
{chat.message}
@@ -366,15 +362,14 @@ const Chatbot: FC = (props) => { setModelModal(chat.model ?? ''); setSourcesModal(chat.sources ?? []); setResponseTime(chat.response_time ?? 0); - setChunkModal( - chat.mode === 'entity search+vector' ? chat.entities ?? [] : chat.chunk_ids ?? [] - ); setTokensUsed(chat.total_tokens ?? 0); setcypherQuery(chat.cypher_query ?? ''); setShowInfoModal(true); setChatsMode(chat.mode ?? ''); setgraphEntitites(chat.graphonly_entities ?? []); + setEntitiesModal(chat.entities ?? []); setmessageError(chat.error ?? ''); + setNodeDetailsModal(chat.nodeDetails ?? {}) }} > {' '} @@ -428,9 +423,8 @@ const Chatbot: FC = (props) => {
= (props) => { disabled={loading || !connectionStatus} size='medium' > - {buttonCaptions.ask} {selectedRows != undefined && selectedRows.length > 0 && `(${selectedRows.length})`} + {buttonCaptions.ask} {selectedFileNames != undefined && selectedFileNames.length > 0 && `(${selectedFileNames.length})`}
@@ -473,13 +467,14 @@ const Chatbot: FC = (props) => { diff --git a/frontend/src/components/ChatBot/Communities.tsx b/frontend/src/components/ChatBot/Communities.tsx index 9b530fd0f..11869d3d4 100644 --- a/frontend/src/components/ChatBot/Communities.tsx +++ b/frontend/src/components/ChatBot/Communities.tsx @@ -4,6 +4,7 @@ import ReactMarkdown from 'react-markdown'; import { CommunitiesProps } from '../../types'; const CommunitiesInfo: FC = ({ loading, communities }) => { + console.log('communities', communities); return ( <> {loading ? ( @@ -20,6 +21,10 @@ const CommunitiesInfo: FC = ({ loading, communities }) => { ID : {community.id}
+ + Score : + {community.score} + {community.summary} diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 0995426c1..3f7564461 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -33,6 +33,7 @@ import DatabaseStatusIcon from './UI/DatabaseStatusIcon'; import RetryConfirmationDialog from './Popups/RetryConfirmation/Index'; import retry from '../services/retry'; import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; +import { useMessageContext } from '../context/UserMessages'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -79,7 +80,7 @@ const Content: React.FC = ({ alertType: 'neutral', alertMessage: '', }); - + const { setClearHistoryData } = useMessageContext(); const { filesData, setFilesData, @@ -94,6 +95,7 @@ const Content: React.FC = ({ queue, processedCount, setProcessedCount, + setPostProcessingVal } = useFileContext(); const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); @@ -160,7 +162,7 @@ const Content: React.FC = ({ (async () => { showNormalToast('Some Q&A functionality will only be available afterwards.'); await postProcessing(userCredentials as UserCredentials, postProcessingTasks); - showSuccessToast('All Q&A functionality is available now.'); + showSuccessToast('All Q&A functionality is available now.'); })(); } }, [processedCount, userCredentials, queue]); @@ -510,9 +512,8 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ - userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -522,10 +523,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -540,6 +541,7 @@ const Content: React.FC = ({ setUserCredentials({ uri: '', password: '', userName: '', database: '' }); setSelectedNodes([]); setSelectedRels([]); + setClearHistoryData(true); }; const retryHandler = async (filename: string, retryoption: string) => { @@ -555,12 +557,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - NodesCount: isStartFromBegining ? 0 : f.NodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, - } + ...f, + status: 'Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + NodesCount: isStartFromBegining ? 0 : f.NodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipCount, + } : f; }); }); @@ -849,9 +851,8 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/Graph/CheckboxSelection.tsx b/frontend/src/components/Graph/CheckboxSelection.tsx index b335a4324..738a1dd01 100644 --- a/frontend/src/components/Graph/CheckboxSelection.tsx +++ b/frontend/src/components/Graph/CheckboxSelection.tsx @@ -3,21 +3,23 @@ import React from 'react'; import { CheckboxSectionProps } from '../../types'; import { graphLabels } from '../../utils/Constants'; -const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, isgds }) => ( +const CheckboxSelection: React.FC = ({ graphType, loading, handleChange, isgds, isDocChunk, isEntity }) => (
- handleChange('DocumentChunk')} - /> - handleChange('Entities')} - /> + />)} + {isEntity && ( + handleChange('Entities')} + /> + )} {isgds && ( = ({ graphType.includes('DocumentChunk') && graphType.includes('Entities') ? queryMap.DocChunkEntities : graphType.includes('DocumentChunk') - ? queryMap.DocChunks - : graphType.includes('Entities') - ? queryMap.Entities - : ''; + ? queryMap.DocChunks + : graphType.includes('Entities') + ? queryMap.Entities + : ''; // fit graph to original position const handleZoomToFit = () => { @@ -136,10 +136,10 @@ const GraphViewModal: React.FunctionComponent = ({ const nodeRelationshipData = viewPoint === graphLabels.showGraphView ? await graphQueryAPI( - userCredentials as UserCredentials, - graphQuery, - selectedRows?.map((f) => f.name) - ) + userCredentials as UserCredentials, + graphQuery, + selectedRows?.map((f) => f.name) + ) : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']); return nodeRelationshipData; } catch (error: any) { @@ -152,9 +152,11 @@ const GraphViewModal: React.FunctionComponent = ({ try { const result = await fetchData(); if (result && result.data.data.nodes.length > 0) { - const neoNodes = result.data.data.nodes.map((f: Node) => f); - const neoRels = result.data.data.relationships.map((f: Relationship) => f); + const neoNodes = result.data.data.nodes.map((f: Node) => f).filter((node: ExtendedNode) => node.labels.length === 1); + const nodeIds = new Set(neoNodes.map((node:any) => node.element_id)); + const neoRels = result.data.data.relationships.map((f: Relationship) => f).filter((rel: any) => nodeIds.has(rel.end_node_element_id) && nodeIds.has(rel.start_node_element_id)); const { finalNodes, finalRels, schemeVal } = processGraphData(neoNodes, neoRels); + if (mode === 'refreshMode') { initGraph(graphType, finalNodes, finalRels, schemeVal); } else { @@ -246,8 +248,8 @@ const GraphViewModal: React.FunctionComponent = ({ match && viewPoint === graphLabels.showGraphView ? 100 : match && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -356,8 +358,8 @@ const GraphViewModal: React.FunctionComponent = ({ isActive && viewPoint === graphLabels.showGraphView ? 100 : isActive && viewPoint !== graphLabels.showGraphView - ? 50 - : graphLabels.nodeSize, + ? 50 + : graphLabels.nodeSize, }; }); // deactivating any active relationships @@ -421,7 +423,7 @@ const GraphViewModal: React.FunctionComponent = ({ graphType={graphType} loading={loading} handleChange={handleCheckboxChange} - isgds={allNodes.some((n) => n.labels.includes('__Community__'))} + {...getCheckboxConditions(allNodes)} /> )} diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 07d795484..48e94c023 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -27,7 +27,6 @@ export default function PageLayoutNew({ const [isRightExpanded, setIsRightExpanded] = useState(Boolean(largedesktops)); const [showChatBot, setShowChatBot] = useState(false); const [showDrawerChatbot, setShowDrawerChatbot] = useState(true); - const [clearHistoryData, setClearHistoryData] = useState(false); const [showEnhancementDialog, toggleEnhancementDialog] = useReducer((s) => !s, false); const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); @@ -48,7 +47,7 @@ export default function PageLayoutNew({ } }; - const { messages } = useMessageContext(); + const { messages, setClearHistoryData, clearHistoryData } = useMessageContext(); const { isSchema, setIsSchema, setShowTextFromSchemaDialog, showTextFromSchemaDialog } = useFileContext(); const deleteOnClick = async () => { diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index ee3ef510b..526150db4 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -203,7 +203,6 @@ const SideNav: React.FC = ({ {!isChatModalOpen && ( { setchatModeAnchor(e.currentTarget); setshowChatMode(true); diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx index 28114ced7..b48b6c6a6 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/index.tsx @@ -20,7 +20,7 @@ export default function PostProcessingCheckList() { {POST_PROCESSING_JOBS.map((job, idx) => { - const isCreateCommunities = job.title === 'create_communities'; + const isCreateCommunities = job.title === 'enable_communities'; return ( = ({ children }) = const [messages, setMessages] = useState([ { ...chatbotmessages.listMessages[1], datetime: getDateTime() }, ]); + const [clearHistoryData, setClearHistoryData] = useState(false); const value: MessageContextType = { messages, setMessages, + clearHistoryData, + setClearHistoryData }; return {children}; }; diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index fbab1b719..7ff7e28c4 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -1,44 +1,9 @@ -import { createContext, useContext, useState, Dispatch, SetStateAction, FC, useEffect } from 'react'; -import { CustomFile, FileContextProviderProps, OptionType } from '../types'; +import { createContext, useContext, useState, FC, useEffect } from 'react'; +import { CustomFile, FileContextProviderProps, FileContextType, OptionType, showTextFromSchemaDialogType } from '../types'; import { chatModeLables, defaultLLM } from '../utils/Constants'; import { useCredentials } from './UserCredentials'; import Queue from '../utils/Queue'; -interface showTextFromSchemaDialogType { - triggeredFrom: string; - show: boolean; -} -interface FileContextType { - files: (File | null)[] | []; - filesData: CustomFile[] | []; - setFiles: Dispatch>; - setFilesData: Dispatch>; - model: string; - setModel: Dispatch>; - graphType: string; - setGraphType: Dispatch>; - selectedNodes: readonly OptionType[]; - setSelectedNodes: Dispatch>; - selectedRels: readonly OptionType[]; - setSelectedRels: Dispatch>; - rowSelection: Record; - setRowSelection: React.Dispatch>>; - selectedRows: string[]; - setSelectedRows: React.Dispatch>; - selectedSchemas: readonly OptionType[]; - setSelectedSchemas: Dispatch>; - chatMode: string; - setchatMode: Dispatch>; - isSchema: boolean; - setIsSchema: React.Dispatch>; - showTextFromSchemaDialog: showTextFromSchemaDialogType; - setShowTextFromSchemaDialog: React.Dispatch>; - postProcessingTasks: string[]; - setPostProcessingTasks: React.Dispatch>; - queue: Queue; - setQueue: Dispatch>; - processedCount: number; - setProcessedCount: Dispatch>; -} + const FileContext = createContext(undefined); const FileContextProvider: FC = ({ children }) => { @@ -68,9 +33,11 @@ const FileContextProvider: FC = ({ children }) => { 'materialize_text_chunk_similarities', 'enable_hybrid_search_and_fulltext_search_in_bloom', 'materialize_entity_similarities', - 'create_communities', + 'enable_communities', ]); const [processedCount, setProcessedCount] = useState(0); + const [postProcessingVal, setPostProcessingVal] = useState(false); + useEffect(() => { if (selectedNodeLabelstr != null) { const selectedNodeLabel = JSON.parse(selectedNodeLabelstr); @@ -117,6 +84,8 @@ const FileContextProvider: FC = ({ children }) => { setQueue, processedCount, setProcessedCount, + postProcessingVal, + setPostProcessingVal }; return {children}; }; diff --git a/frontend/src/services/ChunkEntitiesInfo.ts b/frontend/src/services/ChunkEntitiesInfo.ts index f69ddd3aa..39d9f8c2e 100644 --- a/frontend/src/services/ChunkEntitiesInfo.ts +++ b/frontend/src/services/ChunkEntitiesInfo.ts @@ -1,20 +1,22 @@ -import { ChatInfo_APIResponse, UserCredentials } from '../types'; +import { ChatInfo_APIResponse, nodeDetailsProps, UserCredentials } from '../types'; import api from '../API/Index'; const chunkEntitiesAPI = async ( userCredentials: UserCredentials, - chunk_ids: string, database: string = 'neo4j', - is_entity: boolean = false + nodeDetails: (nodeDetailsProps), + entities:(string)[], + mode: string, ) => { try { const formData = new FormData(); formData.append('uri', userCredentials?.uri ?? ''); formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); - formData.append('chunk_ids', chunk_ids); formData.append('database', database); - formData.append('is_entity', String(is_entity)); + formData.append('nodedetails', JSON.stringify(nodeDetails)); + formData.append('entities', JSON.stringify(entities)); + formData.append('mode', mode); const response: ChatInfo_APIResponse = await api.post(`/chunk_entities`, formData, { headers: { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 10f16e4d6..b280cc5d6 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -5,6 +5,7 @@ import { OverridableStringUnion } from '@mui/types'; import type { Node, Relationship } from '@neo4j-nvl/base'; import { NonOAuthError } from '@react-oauth/google'; import { BannerType } from '@neo4j-ndl/react'; +import Queue from './utils/Queue'; export interface CustomFileBase extends Partial { processing: number | string; @@ -201,7 +202,7 @@ export interface Source { source_name: string; start_time?: string; } -export interface chunk { +export interface ChunkDetail { id: string; score: number; } @@ -215,7 +216,8 @@ export interface Messages { model?: string; isLoading?: boolean; response_time?: number; - chunk_ids?: chunk[]; + nodeDetails?: nodeDetailsProps; + chunk_ids?: string[]; total_tokens?: number; speaking?: boolean; copying?: boolean; @@ -223,7 +225,7 @@ export interface Messages { cypher_query?: string; graphonly_entities?: []; error?: string; - entities?: chunk[]; + entities?: string[]; } export type ChatbotProps = { @@ -234,7 +236,7 @@ export type ChatbotProps = { isFullScreen?: boolean; connectionStatus: boolean; }; -export interface WikipediaModalTypes extends Omit {} +export interface WikipediaModalTypes extends Omit { } export interface GraphViewModalProps { open: boolean; @@ -257,6 +259,8 @@ export interface CheckboxSectionProps { loading: boolean; handleChange: (graph: GraphType) => void; isgds: boolean; + isDocChunk: boolean; + isEntity: boolean; } export interface fileName { @@ -414,12 +418,13 @@ export interface chatInfoMessage extends Partial { sources: string[]; model: string; response_time: number; - chunk_ids: chunk[]; total_tokens: number; mode: string; cypher_query?: string; graphonly_entities: []; error: string; + entities_ids: string[]; + nodeDetails: nodeDetailsProps; } export interface eventResponsetypes extends Omit { @@ -471,6 +476,7 @@ export type Community = { weight: number; level: number; community_rank: number; + score?: number; }; export type GroupedEntity = { texts: Set; @@ -639,6 +645,8 @@ export interface ContextProps { export interface MessageContextType { messages: Messages[] | []; setMessages: Dispatch>; + clearHistoryData: boolean; + setClearHistoryData: Dispatch> } export interface DatabaseStatusProps { @@ -670,3 +678,63 @@ export type CommunitiesProps = { loading: boolean; communities: Community[]; }; + +export interface entity { + id: string; + score: number; +}; + +export interface community { + id: string; + score: number; +} + +export interface nodeDetailsProps { + chunkdetails?: ChunkDetail[], + entitydetails?: entity[], + communitydetails?: community[] +} + +export type entityProps = { + entityids: [], + relationshipids: [] +} + +export interface showTextFromSchemaDialogType { + triggeredFrom: string; + show: boolean; +} +export interface FileContextType { + files: (File | null)[] | []; + filesData: CustomFile[] | []; + setFiles: Dispatch>; + setFilesData: Dispatch>; + model: string; + setModel: Dispatch>; + graphType: string; + setGraphType: Dispatch>; + selectedNodes: readonly OptionType[]; + setSelectedNodes: Dispatch>; + selectedRels: readonly OptionType[]; + setSelectedRels: Dispatch>; + rowSelection: Record; + setRowSelection: React.Dispatch>>; + selectedRows: string[]; + setSelectedRows: React.Dispatch>; + selectedSchemas: readonly OptionType[]; + setSelectedSchemas: Dispatch>; + chatMode: string; + setchatMode: Dispatch>; + isSchema: boolean; + setIsSchema: React.Dispatch>; + showTextFromSchemaDialog: showTextFromSchemaDialogType; + setShowTextFromSchemaDialog: React.Dispatch>; + postProcessingTasks: string[]; + setPostProcessingTasks: React.Dispatch>; + queue: Queue; + setQueue: Dispatch>; + processedCount: number; + setProcessedCount: Dispatch>; + postProcessingVal: boolean; + setPostProcessingVal: Dispatch>; +} \ No newline at end of file diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 08948ba87..b55c43817 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -75,6 +75,7 @@ export const chatModeLables = { entity_vector: 'entity search+vector', unavailableChatMode: 'Chat mode is unavailable when rows are selected', selected: 'Selected', + global_vector: 'global search+vector+fulltext' }; export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' @@ -107,6 +108,10 @@ export const chatModes = mode: chatModeLables.entity_vector, description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', }, + { + mode : chatModeLables.global_vector, + description: 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.' + } ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; @@ -234,8 +239,8 @@ export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [ performing similarity-based searches.`, }, { - title: 'create_communities', - description: 'Create Communities identifies and groups similar entities, improving search accuracy and analysis.', + title: 'enable_communities', + description: 'Enable community creation across entities to use GraphRAG capabilities both local and global search.', }, ]; export const RETRY_OPIONS = [ diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 97e7bda07..18fb69105 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -178,6 +178,7 @@ export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRela }; }); const finalNodes = newNodes.flat(); + // Process relationships const newRels: Relationship[] = neoRels.map((relations: any) => { return { id: relations.element_id, @@ -213,6 +214,7 @@ export const filterData = ( (type) => type !== 'Document' && type !== 'Chunk' && type !== '__Community__' ); // Only Document + Chunk + // const processedEntities = entityTypes.flatMap(item => item.includes(',') ? item.split(',') : item); if ( graphType.includes('DocumentChunk') && !graphType.includes('Entities') && @@ -245,6 +247,7 @@ export const filterData = ( nodeIds.has(rel.to) ); filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme; + console.log('labels', entityNodes); // Only Communities } else if ( graphType.includes('Communities') && @@ -336,6 +339,7 @@ export const filterData = ( filteredNodes = allNodes; filteredRelations = allRelationships; filteredScheme = scheme; + console.log('entity', filteredScheme); } return { filteredNodes, filteredRelations, filteredScheme }; }; @@ -428,6 +432,8 @@ export const getDescriptionForChatMode = (mode: string): string => { return 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.'; case chatModeLables.entity_vector: return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; + case chatModeLables.global_vector: + return 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.' default: return 'Chat mode description not available'; // Fallback description } @@ -469,3 +475,10 @@ export function isAllowedHost(url: string, allowedHosts: string[]) { return false; } } + +export const getCheckboxConditions = (allNodes: ExtendedNode[]) => { + const isDocChunk = allNodes.some((n) => n.labels?.includes('Document')); + const isEntity = allNodes.some((n) => !n.labels?.includes('Document') || !n.labels?.includes('Chunk')); + const isgds = allNodes.some((n) => n.labels?.includes('__Community__')); + return { isDocChunk, isEntity, isgds }; +};