From 2b12131bd4bfba1ad59a13f4abb70beb6dfaf50e Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Fri, 27 Dec 2024 14:59:09 +0530 Subject: [PATCH 01/13] Update README.md --- README.md | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 8c8221bc7..a252acb7e 100644 --- a/README.md +++ b/README.md @@ -35,27 +35,8 @@ Accoroding to enviornment we are configuring the models which is indicated by VI EX: ```env VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" -``` -According to the environment, we are configuring the models which indicated by VITE_LLM_MODELS_PROD variable we can configure models based on our needs. -EX: -```env -VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" -``` -if you only want OpenAI: -```env -VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o" -VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o" -OPENAI_API_KEY="your-openai-key" ``` - -if you only want Diffbot: -```env -VITE_LLM_MODELS_PROD="diffbot" -VITE_LLM_MODELS_PROD="diffbot" -DIFFBOT_API_KEY="your-diffbot-key" -``` - You can then run Docker Compose to build and start all components: ```bash docker-compose up --build @@ -88,7 +69,6 @@ VITE_CHAT_MODES="" If however you want to specify the only vector mode or only graph mode you can do that by specifying the mode in the env: ```env VITE_CHAT_MODES="vector,graph" -VITE_CHAT_MODES="vector,graph" ``` #### Running Backend and Frontend separately (dev environment) @@ -105,7 +85,7 @@ Alternatively, you can run the backend and frontend separately: ``` - For the backend: -1. Create the backend/.env file by copy/pasting the backend/example.env. To streamline the initial setup and testing of the application, you can preconfigure user credentials directly within the .env file. This bypasses the login dialog and allows you to immediately connect with a predefined user. +1. Create the backend/.env file by copy/pasting the backend/example.env. To streamline the initial setup and testing of the application, you can preconfigure user credentials directly within the backend .env file. This bypasses the login dialog and allows you to immediately connect with a predefined user. - **NEO4J_URI**: - **NEO4J_USERNAME**: - **NEO4J_PASSWORD**: @@ -139,6 +119,8 @@ Allow unauthenticated request : Yes ## ENV | Env Variable Name | Mandatory/Optional | Default Value | Description | |-------------------------|--------------------|---------------|--------------------------------------------------------------------------------------------------| +| | +| **BACKEND ENV** | EMBEDDING_MODEL | Optional | all-MiniLM-L6-v2 | Model for generating the text embedding (all-MiniLM-L6-v2 , openai , vertexai) | | IS_EMBEDDING | Optional | true | Flag to enable text embedding | | KNN_MIN_SCORE | Optional | 0.94 | Minimum score for KNN algorithm | @@ -152,7 +134,13 @@ Allow unauthenticated request : Yes | LANGCHAIN_API_KEY | Optional | | API key for Langchain | | LANGCHAIN_PROJECT | Optional | | Project for Langchain | | LANGCHAIN_TRACING_V2 | Optional | true | Flag to enable Langchain tracing | +| GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | | LANGCHAIN_ENDPOINT | Optional | https://api.smith.langchain.com | Endpoint for Langchain API | +| ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | +| LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | +| RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | +| | +| **FRONTEND ENV** | VITE_BACKEND_API_URL | Optional | http://localhost:8000 | URL for backend API | | VITE_BLOOM_URL | Optional | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization | | VITE_REACT_APP_SOURCES | Mandatory | local,youtube,wiki,s3 | List of input sources that will be available | @@ -163,10 +151,6 @@ Allow unauthenticated request : Yes | VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | | VITE_LLM_MODELS_PROD | Optional | openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash | To Distinguish models based on the Enviornment PROD or DEV | VITE_LLM_MODELS | Optional | 'diffbot,openai_gpt_3.5,openai_gpt_4o,openai_gpt_4o_mini,gemini_1.5_pro,gemini_1.5_flash,azure_ai_gpt_35,azure_ai_gpt_4o,ollama_llama3,groq_llama3_70b,anthropic_claude_3_5_sonnet' | Supported Models For the application -| GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | -| ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | -| LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | -| RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | ## LLMs Supported 1. OpenAI From e44ca29710728f15a82d91a84af7c9ea5e22818c Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 6 Jan 2025 13:31:21 +0530 Subject: [PATCH 02/13] Update frontend_docs.adoc --- docs/frontend/frontend_docs.adoc | 41 ++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/docs/frontend/frontend_docs.adoc b/docs/frontend/frontend_docs.adoc index 34e71f254..f4d27e0ad 100644 --- a/docs/frontend/frontend_docs.adoc +++ b/docs/frontend/frontend_docs.adoc @@ -16,17 +16,20 @@ This document provides a comprehensive guide for developers on how we build a Re == Folders . + ├── API + ├── Assets ├── Components | ├─ ChatBot + | | ├─ Chatbot | | ├─ ChatInfoModal - | | ├─ ChatModeToggle - | | ├─ ExpandedChatButtonContainer | | ├─ ChatModesSwitch + | | ├─ ChatModeToggle | | ├─ ChatOnlyComponent | | ├─ ChatInfo | | ├─ CommonChatActions | | ├─ CommunitiesInfo | | ├─ EntitiesInfo + | | ├─ ExpandedChatButtonContainer | | ├─ MetricsCheckbox | | ├─ MetricsTab | | ├─ MultiModeMetrics @@ -64,12 +67,15 @@ This document provides a comprehensive guide for developers on how we build a Re | ├─ UI | | ├─ Alert | | ├─ ButtonWithTooltip + | | |─ BreakDownPopOver | | ├─ CustomButton | | ├─ CustomCheckBox | | ├─ CustomMenu + | | ├─ CustomPopOver | | ├─ CustomProgressBar | | ├─ DatabaseIcon | | ├─ DatabaseStatusIcon + | | ├─ Dropdown | | ├─ ErrorBoundary | | ├─ FallBackDialog | | ├─ HoverableLink @@ -86,7 +92,6 @@ This document provides a comprehensive guide for developers on how we build a Re | | ├─ GenericSourceButton | | ├─ GenericSourceModal | ├─ Content - | ├─ Dropdown | ├─ FileTable | ├─ QuickStarter ├── HOC @@ -103,6 +108,9 @@ This document provides a comprehensive guide for developers on how we build a Re | ├─ UserCredentials | ├─ UserMessages | ├─ UserFiles + ├── HOC + | ├─ CustomModal + | ├─ WithVisibility ├── Hooks | ├─ useSourceInput | ├─ useSpeech @@ -141,6 +149,12 @@ Created a connection modal by adding details including protocol, URI, database n image::images/ConnectionModal.jpg[NoConnection, 600] * After connection: + * ** Aura DS Connection ** + +image::images/NoFiles.jpg[Connection, 600] + + + * ** Aura DB connection ** image::images/NoFiles.jpg[Connection, 600] @@ -192,17 +206,25 @@ image::images/GenerateGraph.jpg[Generate Graph, 600] == 5. Graph Generation: -* Created a component for generating graphs based on the files in the table, to extract nodes and relationships. When the user clicks on the Preview Graph or on the Table View icon the user can see that the graph model holds three options for viewing: Lexical Graph, Entity Graph and Knowledge Graph. We utilized Neo4j's graph library to visualize the extracted nodes and relationships in the form of a graph query API: ***/graph_query***. There are options for customizing the graph visualization such as layout algorithms [zoom in, zoom out, fit, refresh], node styling, relationship types. + +* ***/graph_query:*** + *** Created a component for generating graphs based on the files in the table, to extract nodes and relationships. When the user clicks on the Preview Graph or on the Table View icon the user can see that the graph model holds three options for viewing: Lexical Graph, Entity Graph and Knowledge Graph. We utilized Neo4j's graph library to visualize the extracted nodes and relationships in the form of a graph query API: ***/graph_query***. There are options for customizing the graph visualization such as layout algorithms [zoom in, zoom out, fit, refresh], node styling, relationship types. image::images/KnowledgeGraph.jpg[Knowledge Graph, 600] image::images/EntityGraph.jpg[Entity Graph, 600] image::images/EntityGraph.jpg[Entity Graph, 600] +* ***/get_neighbours:*** + ** This API is used to retrive the neighbor nodes of the given element id of the node. + == 6. Chatbot: * Created a Chatbot Component which has state variables to manage user input and chat messages. Once the user asks the question and clicks on the Ask button API: ***/chatbot*** is triggered to send user input to the backend and receive the response. The chat also has options for users to see more details about the chat, text to speech and copy the response. image::images/ChatResponse.jpg[ChatResponse, 600] +* ***/clear_chat_bot:*** + ** to clear the chat history which is saved in Neo4j DB. + * ***/chunk_entities:*** ** to fetch the number of sources, entities and chunks @@ -219,7 +241,16 @@ image::images/EntitiesInfo.jpg[EntitiesInfo, 600] image::images/ChunksInfo.jpg[ChunksInfo, 600] -* There are three modes ***Vector***, ***Graph***, ***Graph+Vector*** that can be provided to the chat to retrieve the answers. +* ***/metric:*** + ** The API responsible for a evaluating chatbot responses on the basis of different metrics such as faithfulness and answer relevancy. This utilises RAGAS library to calculate these metrics. + +* ***/additional_metrics:*** + ** The API responsible for a evaluating chatbot responses on the basis of different metrics such as context entity recall, semantic score, rouge score. This reuqire additional ground truth to be supplied by user. This utilises RAGAS library to calculate these metrics. + + + +* There are three modes ***Vector***, ***Fulltext***, ***Graph+Vector+Fulltext***, ***Entity search+Vector***, ***Graph+Vector+Fulltext*** that can be provided to the chat to retrieve the answers. +* There are one more mode ***Global search+Vector+Fulltext*** that can be provided to the chat to retrieve the answers if aura instance is ***GDS***. image::images/ChatModes.jpg[ChatModes, 600] From 05dd885a831850e1347b5c0a5c68d2ff8c052ef2 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:55:16 +0530 Subject: [PATCH 03/13] Update frontend_docs.adoc --- docs/frontend/frontend_docs.adoc | 110 ++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 3 deletions(-) diff --git a/docs/frontend/frontend_docs.adoc b/docs/frontend/frontend_docs.adoc index f4d27e0ad..6120b08c5 100644 --- a/docs/frontend/frontend_docs.adoc +++ b/docs/frontend/frontend_docs.adoc @@ -360,7 +360,7 @@ Followed best practices for optimizing performance and security of the deployed == 10. API Reference ----- -POST /connect +1) POST /connect ----- Neo4j database connection on frontend is done with this API. @@ -374,7 +374,7 @@ Neo4j database connection on frontend is done with this API. === Upload Files from Local ---- -POST /upload +2) POST /upload ---- The upload endpoint is designed to handle the uploading of large files by breaking them into smaller chunks. This method ensures that large files can be uploaded efficiently without overloading the server. @@ -394,7 +394,7 @@ The upload endpoint is designed to handle the uploading of large files by breaki === User Defined Schema ---- -POST /schema +3) POST /schema ---- User can set schema for graph generation (i.e. Nodes and relationship labels) in settings panel or get existing db schema through this API. @@ -586,6 +586,23 @@ This API is used to view graph for a particular file. * `query_type`= Neo4j database name * `document_names` = File name for which user wants to view graph +.... + +=== Get neighbour nodes +---- +POST /get_neighbours +---- + +This API is used to retrive the neighbor nodes of the given element id of the node. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `elementId` = Element id of the node to retrive its neighbours + === SSE event to update processing status ---- GET /update_extract_status @@ -663,6 +680,93 @@ The API is used to delete unconnected entities from database. * `database`= Neo4j database name, * `unconnected_entities_list`=selected entities list to delete of unconnected entities. +.... + +=== Get the list of orphan nodes +---- +POST /get_unconnected_nodes_list +---- + +The API retrieves a list of nodes in the graph database that are not connected to any other nodes. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + +=== Get duplicate nodes +---- +POST /get_duplicate_nodes +---- + +The API is used to fetch duplicate entities from database. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, + +.... + + +=== Merge duplicate nodes +---- +POST /merge_duplicate_nodes +---- + +The API is used to merge duplicate entities from database selected by user. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `duplicate_nodes_list`= selected entities list to merge of with similar entities. + +.... +=== Drop and create vector index +---- +POST /drop_create_vector_index +---- + +The API is used to drop and create the vector index when vector index dimesion are different. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `isVectorIndexExist`= True or False based on whether vector index exist in database, + +=== Reprocessing of sources +---- +POST /retry_processing +---- + +This API is used to Ready to Reprocess cancelled, completed or failed file sources. +Users have 3 options to Ready to Reprocess files: + +* Start from begnning - In this condition file will be processed from the begnning i.e. 1st chunk again. +* Delete entities and start from begnning - If the file source is already processed and have any existing nodes and relations then those will be deleted and file will be reprocessed from the 1st chunk. +* Start from last processed postion - Cancelled or failed files will be processed from the last successfully processed chunk position. This option is not available for completed files. + +Ones the status is set to 'Ready to Reprocess', user can again click on Generate graph to process the file for knowledge graph creation. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `file_name`= Name of the file which user want to Ready to Reprocess. +* `retry_condition` = One of the above 3 conditions which is selected for reprocessing. + == 11. Conclusion: In conclusion, this technical document outlines the process of building a React application with Neo4j Aura integration for graph database functionalities. From 0ed151564e059dabfb322051ac0e5d3066202d29 Mon Sep 17 00:00:00 2001 From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com> Date: Tue, 7 Jan 2025 05:56:55 +0000 Subject: [PATCH 04/13] folder structure --- frontend/src/HOC/WithVisibility.tsx | 13 ++++ .../src/components/UI/BreakDownPopOver.tsx | 26 +++++++ frontend/src/components/UI/Dropdown.tsx | 74 +++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 frontend/src/HOC/WithVisibility.tsx create mode 100644 frontend/src/components/UI/BreakDownPopOver.tsx create mode 100644 frontend/src/components/UI/Dropdown.tsx diff --git a/frontend/src/HOC/WithVisibility.tsx b/frontend/src/HOC/WithVisibility.tsx new file mode 100644 index 000000000..8b3dd53af --- /dev/null +++ b/frontend/src/HOC/WithVisibility.tsx @@ -0,0 +1,13 @@ +import { VisibilityProps } from "../types"; + +export function withVisibility

(WrappedComponent: React.ComponentType

) { + const VisibityControlled = (props: P & VisibilityProps) => { + if (props.isVisible === false) { + return null; + } + + return ; + }; + + return VisibityControlled; +} diff --git a/frontend/src/components/UI/BreakDownPopOver.tsx b/frontend/src/components/UI/BreakDownPopOver.tsx new file mode 100644 index 000000000..ab3e58941 --- /dev/null +++ b/frontend/src/components/UI/BreakDownPopOver.tsx @@ -0,0 +1,26 @@ +import CustomPopOver from './CustomPopOver'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import { CustomFileBase } from '../../types'; +import { useCredentials } from '../../context/UserCredentials'; + +export default function BreakDownPopOver({ file, isNodeCount = true }: { file: CustomFileBase; isNodeCount: boolean }) { + const { isGdsActive } = useCredentials(); + + return ( + }> + {isNodeCount ? ( +

+ ) : ( + + )} + + ); +} diff --git a/frontend/src/components/UI/Dropdown.tsx b/frontend/src/components/UI/Dropdown.tsx new file mode 100644 index 000000000..4e6da8436 --- /dev/null +++ b/frontend/src/components/UI/Dropdown.tsx @@ -0,0 +1,74 @@ +import { Tooltip, useMediaQuery, Select } from '@neo4j-ndl/react'; +import { OptionType, ReusableDropdownProps } from '../../types'; +import { memo, useMemo } from 'react'; +import { capitalize, capitalizeWithUnderscore } from '../../utils/Utils'; +import { prodllms } from '../../utils/Constants'; +const DropdownComponent: React.FC = ({ + options, + placeholder, + defaultValue, + onSelect, + children, + view, + isDisabled, + value, +}) => { + const isProdEnv = process.env.VITE_ENV === 'PROD'; + const isLargeDesktop = useMediaQuery(`(min-width:1440px )`); + const handleChange = (selectedOption: OptionType | null | void) => { + onSelect(selectedOption); + const existingModel = localStorage.getItem('selectedModel'); + if (existingModel != selectedOption?.value) { + localStorage.setItem('selectedModel', selectedOption?.value ?? ''); + } + }; + const allOptions = useMemo(() => options, [options]); + return ( + <> +
+