|
8 | 8 | from langchain_core.prompts import ChatPromptTemplate
|
9 | 9 | from src.shared.constants import GRAPH_CLEANUP_PROMPT
|
10 | 10 | from src.llm import get_llm
|
11 |
| -from src.main import get_labels_and_relationtypes |
| 11 | +from src.graphDB_dataAccess import graphDBdataAccess |
| 12 | +import time |
| 13 | + |
12 | 14 |
|
13 | 15 | DROP_INDEX_QUERY = "DROP INDEX entities IF EXISTS;"
|
14 | 16 | LABELS_QUERY = "CALL db.labels()"
|
@@ -195,58 +197,35 @@ def update_embeddings(rows, graph):
|
195 | 197 | return graph.query(query,params={'rows':rows})
|
196 | 198 |
|
197 | 199 | def graph_schema_consolidation(graph):
|
198 |
| - nodes_and_relations = get_labels_and_relationtypes(graph) |
199 |
| - logging.info(f"nodes_and_relations in existing graph : {nodes_and_relations}") |
200 |
| - node_labels = [] |
201 |
| - relation_labels = [] |
202 |
| - |
203 |
| - node_labels.extend(nodes_and_relations[0]['labels']) |
204 |
| - relation_labels.extend(nodes_and_relations[0]['relationshipTypes']) |
205 |
| - |
206 |
| - exclude_node_labels = ['Document','Chunk','_Bloom_Perspective_', '__Community__', '__Entity__'] |
207 |
| - exclude_relationship_labels = ['PART_OF', 'NEXT_CHUNK', 'HAS_ENTITY', '_Bloom_Perspective_','FIRST_CHUNK','SIMILAR','IN_COMMUNITY','PARENT_COMMUNITY'] |
208 |
| - |
209 |
| - node_labels = [i for i in node_labels if i not in exclude_node_labels ] |
210 |
| - relation_labels = [i for i in relation_labels if i not in exclude_relationship_labels] |
211 |
| - |
| 200 | + graphDb_data_Access = graphDBdataAccess(graph) |
| 201 | + node_labels,relation_labels = graphDb_data_Access.get_nodelabels_relationships() |
212 | 202 | parser = JsonOutputParser()
|
213 |
| - prompt = ChatPromptTemplate(messages=[("system",GRAPH_CLEANUP_PROMPT),("human", "{input}")], |
214 |
| - partial_variables={"format_instructions": parser.get_format_instructions()}) |
215 |
| - |
216 |
| - graph_cleanup_model = os.getenv("GRAPH_CLEANUP_MODEL",'openai_gpt_4o') |
| 203 | + prompt = ChatPromptTemplate( |
| 204 | + messages=[("system", GRAPH_CLEANUP_PROMPT), ("human", "{input}")], |
| 205 | + partial_variables={"format_instructions": parser.get_format_instructions()} |
| 206 | + ) |
| 207 | + graph_cleanup_model = os.getenv("GRAPH_CLEANUP_MODEL", 'openai_gpt_4o') |
217 | 208 | llm, _ = get_llm(graph_cleanup_model)
|
218 | 209 | chain = prompt | llm | parser
|
219 |
| - nodes_dict = chain.invoke({'input':node_labels}) |
220 |
| - relation_dict = chain.invoke({'input':relation_labels}) |
221 |
| - |
222 |
| - node_match = {} |
223 |
| - relation_match = {} |
224 |
| - for new_label , values in nodes_dict.items() : |
225 |
| - for old_label in values: |
226 |
| - if new_label != old_label: |
227 |
| - node_match[old_label]=new_label |
228 |
| - |
229 |
| - for new_label , values in relation_dict.items() : |
230 |
| - for old_label in values: |
231 |
| - if new_label != old_label: |
232 |
| - relation_match[old_label]=new_label |
233 |
| - |
234 |
| - logging.info(f"updated node labels : {node_match}") |
235 |
| - logging.info(f"Reduced node counts from {len(node_labels)} to {len(node_match.items())}") |
236 |
| - logging.info(f"updated relationship labels : {relation_match}") |
237 |
| - logging.info(f"Reduced relationship counts from {len(relation_labels)} to {len(relation_match.items())}") |
238 | 210 |
|
239 |
| - # Update node labels in graph |
240 |
| - for old_label, new_label in node_match.items(): |
241 |
| - query = f""" |
242 |
| - MATCH (n:`{old_label}`) |
243 |
| - SET n:`{new_label}` |
244 |
| - REMOVE n:`{old_label}` |
245 |
| - """ |
246 |
| - graph.query(query) |
| 211 | + nodes_relations_input = {'nodes': node_labels, 'relationships': relation_labels} |
| 212 | + mappings = chain.invoke({'input': nodes_relations_input}) |
| 213 | + node_mapping = {old: new for new, old_list in mappings['nodes'].items() for old in old_list if new != old} |
| 214 | + relation_mapping = {old: new for new, old_list in mappings['relationships'].items() for old in old_list if new != old} |
| 215 | + |
| 216 | + logging.info(f"Node Labels: Total = {len(node_labels)}, Reduced to = {len(set(node_mapping.values()))} (from {len(node_mapping)})") |
| 217 | + logging.info(f"Relationship Types: Total = {len(relation_labels)}, Reduced to = {len(set(relation_mapping.values()))} (from {len(relation_mapping)})") |
| 218 | + |
| 219 | + if node_mapping: |
| 220 | + for old_label, new_label in node_mapping.items(): |
| 221 | + query = f""" |
| 222 | + MATCH (n:`{old_label}`) |
| 223 | + SET n:`{new_label}` |
| 224 | + REMOVE n:`{old_label}` |
| 225 | + """ |
| 226 | + graph.query(query) |
247 | 227 |
|
248 |
| - # Update relation types in graph |
249 |
| - for old_label, new_label in relation_match.items(): |
| 228 | + for old_label, new_label in relation_mapping.items(): |
250 | 229 | query = f"""
|
251 | 230 | MATCH (n)-[r:`{old_label}`]->(m)
|
252 | 231 | CREATE (n)-[r2:`{new_label}`]->(m)
|
|
0 commit comments