Skip to content

Commit e1d0b4f

Browse files
User flow changes for recreating supported vector index (#682)
* removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <[email protected]>
1 parent 3a37b4e commit e1d0b4f

File tree

8 files changed

+132
-67
lines changed

8 files changed

+132
-67
lines changed

backend/score.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -611,11 +611,11 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da
611611
gc.collect()
612612

613613
@app.post("/drop_create_vector_index")
614-
async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(), is_vector_index_recreate=Form()):
614+
async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()):
615615
try:
616616
graph = create_graph_database_connection(uri, userName, password, database)
617617
graphDb_data_Access = graphDBdataAccess(graph)
618-
result = graphDb_data_Access.drop_create_vector_index(is_vector_index_recreate)
618+
result = graphDb_data_Access.drop_create_vector_index(isVectorIndexExist)
619619
return create_api_response('Success',message=result)
620620
except Exception as e:
621621
job_status = "Failed"

backend/src/graphDB_dataAccess.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -158,23 +158,34 @@ def connection_check_and_get_vector_dimensions(self):
158158
WHERE type = 'VECTOR' AND name = 'vector'
159159
RETURN options.indexConfig['vector.dimensions'] AS vector_dimensions
160160
""")
161+
162+
result_chunks = self.graph.query("""match (c:Chunk) return size(c.embedding) as embeddingSize, count(*) as chunks,
163+
count(c.embedding) as hasEmbedding
164+
""")
165+
161166
embedding_model = os.getenv('EMBEDDING_MODEL')
162167
embeddings, application_dimension = load_embedding_model(embedding_model)
163168
logging.info(f'embedding model:{embeddings} and dimesion:{application_dimension}')
169+
# print(chunks_exists)
164170

165171
if self.graph:
166172
if len(db_vector_dimension) > 0:
167173
return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful"}
168174
else:
169-
logging.info("Vector index does not exist in database")
170-
return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful"}
175+
if len(db_vector_dimension) == 0 and len(result_chunks) == 0:
176+
logging.info("Chunks and vector index does not exists in database")
177+
return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False}
178+
elif len(db_vector_dimension) == 0 and result_chunks[0]['hasEmbedding']==0 and result_chunks[0]['chunks'] > 0:
179+
return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True}
180+
else:
181+
return {'message':"Connection Successful"}
171182

172183
def execute_query(self, query, param=None):
173184
return self.graph.query(query, param)
174185

175186
def get_current_status_document_node(self, file_name):
176187
query = """
177-
MATCH(d:Document {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime,
188+
MATCH(d:Document {fileName : $file_name}) RETURN d.stats AS Status , d.processingTime AS processingTime,
178189
d.nodeCount AS nodeCount, d.model as model, d.relationshipCount as relationshipCount,
179190
d.total_pages AS total_pages, d.total_chunks AS total_chunks , d.fileSize as fileSize,
180191
d.is_cancelled as is_cancelled, d.processed_chunk as processed_chunk, d.fileSource as fileSource
@@ -322,15 +333,16 @@ def merge_duplicate_nodes(self,duplicate_nodes_list):
322333
param = {"rows":nodes_list}
323334
return self.execute_query(query,param)
324335

325-
def drop_create_vector_index(self, is_vector_index_recreate):
336+
def drop_create_vector_index(self, isVectorIndexExist):
326337
"""
327338
drop and create the vector index when vector index dimesion are different.
328339
"""
329340
embedding_model = os.getenv('EMBEDDING_MODEL')
330341
embeddings, dimension = load_embedding_model(embedding_model)
331-
if is_vector_index_recreate == 'true':
332-
self.graph.query("""drop index vector""")
333342

343+
if isVectorIndexExist == 'true':
344+
self.graph.query("""drop index vector""")
345+
# self.graph.query("""drop index vector""")
334346
self.graph.query("""CREATE VECTOR INDEX `vector` if not exists for (c:Chunk) on (c.embedding)
335347
OPTIONS {indexConfig: {
336348
`vector.dimensions`: $dimensions,
@@ -341,4 +353,4 @@ def drop_create_vector_index(self, is_vector_index_recreate):
341353
"dimensions" : dimension
342354
}
343355
)
344-
return "Drop and Re-Create vector index succesfully"
356+
return "Drop and Re-Create vector index succesfully"

frontend/src/components/Content.tsx

+19-7
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@ const Content: React.FC<ContentProps> = ({
4848
const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`);
4949
const [init, setInit] = useState<boolean>(false);
5050
const [openConnection, setOpenConnection] = useState<connectionState>({
51-
isvectorIndexMatch: true,
5251
openPopUp: false,
53-
novectorindexInDB: true,
52+
chunksExists: false,
53+
vectorIndexMisMatch: false,
54+
chunksExistsWithDifferentDimension: false,
5455
});
5556
const [openGraphView, setOpenGraphView] = useState<boolean>(false);
5657
const [inspectedName, setInspectedName] = useState<string>('');
@@ -594,14 +595,24 @@ const Content: React.FC<ContentProps> = ({
594595
userDbVectorIndex: response.data.data.db_vector_dimension,
595596
})
596597
);
597-
if (response.data.data.application_dimension === response.data.data.db_vector_dimension) {
598+
if (
599+
(response.data.data.application_dimension === response.data.data.db_vector_dimension ||
600+
response.data.data.db_vector_dimension == 0) &&
601+
!response.data.data.chunks_exists
602+
) {
598603
setConnectionStatus(true);
599604
setOpenConnection((prev) => ({ ...prev, openPopUp: false }));
600605
} else {
601606
setOpenConnection({
602-
isvectorIndexMatch: false,
603607
openPopUp: true,
604-
novectorindexInDB: response.data.data.db_vector_dimension === 0,
608+
chunksExists: response.data.data.chunks_exists as boolean,
609+
vectorIndexMisMatch:
610+
response.data.data.db_vector_dimension > 0 &&
611+
response.data.data.db_vector_dimension != response.data.data.application_dimension,
612+
chunksExistsWithDifferentDimension:
613+
response.data.data.db_vector_dimension > 0 &&
614+
response.data.data.db_vector_dimension != response.data.data.application_dimension &&
615+
(response.data.data.chunks_exists ?? true),
605616
});
606617
setConnectionStatus(false);
607618
}
@@ -716,8 +727,9 @@ const Content: React.FC<ContentProps> = ({
716727
open={openConnection.openPopUp}
717728
setOpenConnection={setOpenConnection}
718729
setConnectionStatus={setConnectionStatus}
719-
isVectorIndexMatch={openConnection.isvectorIndexMatch}
720-
noVectorIndexFound={openConnection.novectorindexInDB}
730+
isVectorIndexMatch={openConnection.vectorIndexMisMatch}
731+
chunksExistsWithoutEmbedding={openConnection.chunksExists}
732+
chunksExistsWithDifferentEmbedding={openConnection.chunksExistsWithDifferentDimension}
721733
/>
722734
</Suspense>
723735

frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx

+78-42
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ export default function ConnectionModal({
1313
setOpenConnection,
1414
setConnectionStatus,
1515
isVectorIndexMatch,
16-
noVectorIndexFound,
16+
chunksExistsWithoutEmbedding,
17+
chunksExistsWithDifferentEmbedding,
1718
}: ConnectionModalProps) {
1819
let prefilledconnection = localStorage.getItem('neo4j.connection');
1920
let initialuri;
@@ -98,20 +99,21 @@ export default function ConnectionModal({
9899
[userCredentials, userDbVectorIndex]
99100
);
100101
useEffect(() => {
101-
if (!isVectorIndexMatch) {
102+
if (isVectorIndexMatch || chunksExistsWithoutEmbedding) {
102103
setMessage({
103104
type: 'danger',
104105
content: (
105106
<VectorIndexMisMatchAlert
106107
vectorIndexLoading={vectorIndexLoading}
107-
recreateVectorIndex={() => recreateVectorIndex(!noVectorIndexFound)}
108-
isVectorIndexAlreadyExists={!noVectorIndexFound}
108+
recreateVectorIndex={() => recreateVectorIndex(chunksExistsWithDifferentEmbedding)}
109+
isVectorIndexAlreadyExists={chunksExistsWithDifferentEmbedding || isVectorIndexMatch}
109110
userVectorIndexDimension={JSON.parse(localStorage.getItem('neo4j.connection') ?? 'null').userDbVectorIndex}
111+
chunksExists={chunksExistsWithoutEmbedding}
110112
/>
111113
),
112114
});
113115
}
114-
}, [isVectorIndexMatch, vectorIndexLoading, noVectorIndexFound]);
116+
}, [isVectorIndexMatch, vectorIndexLoading, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]);
115117

116118
const parseAndSetURI = (uri: string, urlparams = false) => {
117119
const uriParts: string[] = uri.split('://');
@@ -189,46 +191,80 @@ export default function ConnectionModal({
189191
const connectionURI = `${protocol}://${URI}${URI.split(':')[1] ? '' : `:${port}`}`;
190192
setUserCredentials({ uri: connectionURI, userName: username, password: password, database: database, port: port });
191193
setIsLoading(true);
192-
const response = await connectAPI(connectionURI, username, password, database);
193-
if (response?.data?.status === 'Success') {
194-
setUserDbVectorIndex(response.data.data.db_vector_dimension);
195-
if (response.data.data.db_vector_dimension === response.data.data.application_dimension) {
196-
setConnectionStatus(true);
197-
setOpenConnection((prev) => ({ ...prev, openPopUp: false }));
198-
setMessage({
199-
type: 'success',
200-
content: response.data.data.message,
201-
});
194+
try {
195+
const response = await connectAPI(connectionURI, username, password, database);
196+
setIsLoading(false);
197+
if (response?.data?.status !== 'Success') {
198+
throw new Error(response.data.error);
202199
} else {
203-
setMessage({
204-
type: 'danger',
205-
content: (
206-
<VectorIndexMisMatchAlert
207-
vectorIndexLoading={vectorIndexLoading}
208-
recreateVectorIndex={() => recreateVectorIndex(response.data.data.db_vector_dimension != 0)}
209-
isVectorIndexAlreadyExists={response.data.data.db_vector_dimension != 0}
210-
userVectorIndexDimension={response.data.data.db_vector_dimension}
211-
/>
212-
),
213-
});
200+
setUserDbVectorIndex(response.data.data.db_vector_dimension);
201+
if (
202+
(response.data.data.application_dimension === response.data.data.db_vector_dimension ||
203+
response.data.data.db_vector_dimension == 0) &&
204+
!response.data.data.chunks_exists
205+
) {
206+
setConnectionStatus(true);
207+
setOpenConnection((prev) => ({ ...prev, openPopUp: false }));
208+
setMessage({
209+
type: 'success',
210+
content: response.data.data.message,
211+
});
212+
} else if ((response.data.data.chunks_exists ?? true) && response.data.data.db_vector_dimension == 0) {
213+
setMessage({
214+
type: 'danger',
215+
content: (
216+
<VectorIndexMisMatchAlert
217+
vectorIndexLoading={vectorIndexLoading}
218+
recreateVectorIndex={() =>
219+
recreateVectorIndex(
220+
!(
221+
response.data.data.db_vector_dimension > 0 &&
222+
response.data.data.db_vector_dimension != response.data.data.application_dimension
223+
)
224+
)
225+
}
226+
isVectorIndexAlreadyExists={response.data.data.db_vector_dimension != 0}
227+
chunksExists={true}
228+
/>
229+
),
230+
});
231+
} else {
232+
setMessage({
233+
type: 'danger',
234+
content: (
235+
<VectorIndexMisMatchAlert
236+
vectorIndexLoading={vectorIndexLoading}
237+
recreateVectorIndex={() => recreateVectorIndex(true)}
238+
isVectorIndexAlreadyExists={
239+
response.data.data.db_vector_dimension != 0 &&
240+
response.data.data.db_vector_dimension != response.data.data.application_dimension
241+
}
242+
chunksExists={true}
243+
userVectorIndexDimension={response.data.data.db_vector_dimension}
244+
/>
245+
),
246+
});
247+
}
248+
localStorage.setItem(
249+
'neo4j.connection',
250+
JSON.stringify({
251+
uri: connectionURI,
252+
user: username,
253+
password: password,
254+
database: database,
255+
userDbVectorIndex,
256+
})
257+
);
258+
}
259+
} catch (error) {
260+
setIsLoading(false);
261+
if (error instanceof Error) {
262+
setMessage({ type: 'danger', content: error.message });
263+
setOpenConnection((prev) => ({ ...prev, openPopUp: true }));
264+
setPassword('');
265+
setConnectionStatus(false);
214266
}
215-
localStorage.setItem(
216-
'neo4j.connection',
217-
JSON.stringify({
218-
uri: connectionURI,
219-
user: username,
220-
password: password,
221-
database: database,
222-
userDbVectorIndex,
223-
})
224-
);
225-
} else {
226-
setMessage({ type: 'danger', content: response.data.error });
227-
setOpenConnection((prev) => ({ ...prev, openPopUp: true }));
228-
setPassword('');
229-
setConnectionStatus(false);
230267
}
231-
setIsLoading(false);
232268
setTimeout(() => {
233269
setPassword('');
234270
}, 3000);

frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx

+6-3
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ export default function VectorIndexMisMatchAlert({
88
recreateVectorIndex,
99
isVectorIndexAlreadyExists,
1010
userVectorIndexDimension,
11+
chunksExists,
1112
}: {
1213
vectorIndexLoading: boolean;
1314
recreateVectorIndex: () => Promise<void>;
1415
isVectorIndexAlreadyExists: boolean;
15-
userVectorIndexDimension: number;
16+
userVectorIndexDimension?: number;
17+
chunksExists: boolean;
1618
}) {
1719
const { userCredentials } = useCredentials();
1820
return (
@@ -25,8 +27,9 @@ The existing Neo4j vector index dimension (${userVectorIndexDimension}) is incom
2527
To proceed, please choose one of the following options:
2628
1.**Recreate Vector Index:** Click "Re-Create Vector Index" to generate a compatible vector index.
2729
2.**Use a Different Instance:** Connect to a Neo4j instance with a compatible vector index configuration `
28-
: `**Vector index not found**.
29-
To leverage AI-powered search, please create a vector index.This will enable efficient similarity search within your Neo4j database`}
30+
: chunksExists
31+
? `A vector index is essential for performing efficient similarity searches within your data. Without it, some chunks of data will be invisible to queries based on meaning and context. Creating a vector index unlocks the full potential of your data by allowing you to find related information quickly and accurately.`
32+
: ''}
3033
</Markdown>
3134
</Box>
3235
<Box className='n-size-full n-flex n-flex-col n-items-center n-justify-center'>

frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,12 @@ export default function DeduplicationTab() {
8080
const onRemove = (nodeid: string, similarNodeId: string) => {
8181
setDuplicateNodes((prev) => {
8282
return prev.map((d) =>
83-
d.e.elementId === nodeid
83+
(d.e.elementId === nodeid
8484
? {
8585
...d,
8686
similar: d.similar.filter((n) => n.elementId != similarNodeId),
8787
}
88-
: d
88+
: d)
8989
);
9090
});
9191
};

frontend/src/services/vectorIndexCreation.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export const createVectorIndex = async (userCredentials: UserCredentials, isVect
88
formData.append('database', userCredentials?.database ?? '');
99
formData.append('userName', userCredentials?.userName ?? '');
1010
formData.append('password', userCredentials?.password ?? '');
11-
formData.append('is_vector_index_recreate', JSON.stringify(isVectorIndexExists));
11+
formData.append('isVectorIndexExist', JSON.stringify(isVectorIndexExists));
1212
try {
1313
const response = await axios.post<commonserverresponse>(`${url()}/drop_create_vector_index`, formData);
1414
return response;

frontend/src/types.ts

+5-3
Original file line numberDiff line numberDiff line change
@@ -613,9 +613,10 @@ export interface ExtendedRelationship extends Relationship {
613613
labels: string[];
614614
}
615615
export interface connectionState {
616-
isvectorIndexMatch: boolean;
617616
openPopUp: boolean;
618-
novectorindexInDB: boolean;
617+
chunksExists: boolean;
618+
vectorIndexMisMatch: boolean;
619+
chunksExistsWithDifferentDimension: boolean;
619620
}
620621
export interface Message {
621622
type: 'success' | 'info' | 'warning' | 'danger' | 'unknown';
@@ -627,7 +628,8 @@ export interface ConnectionModalProps {
627628
setOpenConnection: Dispatch<SetStateAction<connectionState>>;
628629
setConnectionStatus: Dispatch<SetStateAction<boolean>>;
629630
isVectorIndexMatch: boolean;
630-
noVectorIndexFound: boolean;
631+
chunksExistsWithoutEmbedding: boolean;
632+
chunksExistsWithDifferentEmbedding: boolean;
631633
}
632634
export interface ReusableDropdownProps extends DropdownProps {
633635
options: string[] | OptionType[];

0 commit comments

Comments
 (0)