Skip to content

Commit 8de117b

Browse files
2 parents 656f71b + 343972f commit 8de117b

File tree

6 files changed

+39
-32
lines changed

6 files changed

+39
-32
lines changed

README.md

-2
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,6 @@ Allow unauthenticated request : Yes
127127
## ENV
128128
| Env Variable Name | Mandatory/Optional | Default Value | Description |
129129
|-------------------------|--------------------|---------------|--------------------------------------------------------------------------------------------------|
130-
| OPENAI_API_KEY | Mandatory | | API key for OpenAI |
131-
| DIFFBOT_API_KEY | Mandatory | | API key for Diffbot |
132130
| EMBEDDING_MODEL | Optional | all-MiniLM-L6-v2 | Model for generating the text embedding (all-MiniLM-L6-v2 , openai , vertexai) |
133131
| IS_EMBEDDING | Optional | true | Flag to enable text embedding |
134132
| KNN_MIN_SCORE | Optional | 0.94 | Minimum score for KNN algorithm |

backend/example.env

+5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ ENTITY_EMBEDDING="" True or False
2828
DUPLICATE_SCORE_VALUE = ""
2929
DUPLICATE_TEXT_DISTANCE = ""
3030
#examples
31+
LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key"
32+
LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key"
33+
LLM_MODEL_CONFIG_gemini_1.5_pro="gemini-1.5-pro-002"
34+
LLM_MODEL_CONFIG_gemini_1.5_flash="gemini-1.5-flash-002"
35+
LLM_MODEL_CONFIG_diffbot="diffbot,diffbot_api_key"
3136
LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version"
3237
LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version"
3338
LLM_MODEL_CONFIG_groq_llama3_70b="model_name,base_url,groq_api_key"

backend/src/llm.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ def get_llm(model: str):
2424
env_key = "LLM_MODEL_CONFIG_" + model
2525
env_value = os.environ.get(env_key)
2626
logging.info("Model: {}".format(env_key))
27+
2728
if "gemini" in model:
29+
model_name = env_value
2830
credentials, project_id = google.auth.default()
29-
model_name = MODEL_VERSIONS[model]
31+
#model_name = MODEL_VERSIONS[model]
3032
llm = ChatVertexAI(
3133
model_name=model_name,
3234
#convert_system_message_to_human=True,
@@ -42,9 +44,10 @@ def get_llm(model: str):
4244
},
4345
)
4446
elif "openai" in model:
45-
model_name = MODEL_VERSIONS[model]
47+
#model_name = MODEL_VERSIONS[model]
48+
model_name, api_key = env_value.split(",")
4649
llm = ChatOpenAI(
47-
api_key=os.environ.get("OPENAI_API_KEY"),
50+
api_key=api_key,
4851
model=model_name,
4952
temperature=0,
5053
)
@@ -93,9 +96,10 @@ def get_llm(model: str):
9396
llm = ChatOllama(base_url=base_url, model=model_name)
9497

9598
elif "diffbot" in model:
96-
model_name = "diffbot"
99+
#model_name = "diffbot"
100+
model_name, api_key = env_value.split(",")
97101
llm = DiffbotGraphTransformer(
98-
diffbot_api_key=os.environ.get("DIFFBOT_API_KEY"),
102+
diffbot_api_key=api_key,
99103
extract_types=["entities", "facts"],
100104
)
101105

backend/src/main.py

+20-16
Original file line numberDiff line numberDiff line change
@@ -518,26 +518,30 @@ def get_chunkId_chunkDoc_list(graph, file_name, pages, retry_condition):
518518
else:
519519
chunkId_chunkDoc_list=[]
520520
chunks = graph.query(QUERY_TO_GET_CHUNKS, params={"filename":file_name})
521-
for chunk in chunks:
522-
chunk_doc = Document(page_content=chunk['text'], metadata={'id':chunk['id'], 'position':chunk['position']})
523-
chunkId_chunkDoc_list.append({'chunk_id': chunk['id'], 'chunk_doc': chunk_doc})
524521

525-
if retry_condition == START_FROM_LAST_PROCESSED_POSITION:
526-
logging.info(f"Retry : start_from_last_processed_position")
527-
starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, params={"filename":file_name})
528-
if starting_chunk[0]["position"] < len(chunkId_chunkDoc_list):
529-
return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:]
522+
if chunks[0]['text'] is None or chunks[0]['text']=="" :
523+
raise Exception(f"Chunks are not created for {file_name}. Please re-upload file and try.")
524+
else:
525+
for chunk in chunks:
526+
chunk_doc = Document(page_content=chunk['text'], metadata={'id':chunk['id'], 'position':chunk['position']})
527+
chunkId_chunkDoc_list.append({'chunk_id': chunk['id'], 'chunk_doc': chunk_doc})
530528

531-
elif starting_chunk[0]["position"] == len(chunkId_chunkDoc_list):
532-
starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, params={"filename":file_name})
533-
return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:]
529+
if retry_condition == START_FROM_LAST_PROCESSED_POSITION:
530+
logging.info(f"Retry : start_from_last_processed_position")
531+
starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_POSITION, params={"filename":file_name})
532+
if starting_chunk[0]["position"] < len(chunkId_chunkDoc_list):
533+
return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:]
534+
535+
elif starting_chunk[0]["position"] == len(chunkId_chunkDoc_list):
536+
starting_chunk = graph.query(QUERY_TO_GET_LAST_PROCESSED_CHUNK_WITHOUT_ENTITY, params={"filename":file_name})
537+
return len(chunks), chunkId_chunkDoc_list[starting_chunk[0]["position"] - 1:]
538+
539+
else:
540+
raise Exception(f"All chunks of {file_name} are alreday processed. If you want to re-process, Please start from begnning")
534541

535542
else:
536-
raise Exception(f"All chunks of {file_name} are alreday processed. If you want to re-process, Please start from begnning")
537-
538-
else:
539-
logging.info(f"Retry : start_from_beginning with chunks {len(chunkId_chunkDoc_list)}")
540-
return len(chunks), chunkId_chunkDoc_list
543+
logging.info(f"Retry : start_from_beginning with chunks {len(chunkId_chunkDoc_list)}")
544+
return len(chunks), chunkId_chunkDoc_list
541545

542546
def get_source_list_from_graph(uri,userName,password,db_name=None):
543547
"""

example.env

-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
# Mandatory
2-
OPENAI_API_KEY=""
3-
DIFFBOT_API_KEY=""
4-
51
# Optional Backend
62
EMBEDDING_MODEL="all-MiniLM-L6-v2"
73
IS_EMBEDDING="true"

frontend/src/utils/Constants.ts

+5-5
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ export const llms =
4141
? (process.env.VITE_LLM_MODELS?.split(',') as string[])
4242
: [
4343
'diffbot',
44-
'openai-gpt-3.5',
45-
'openai-gpt-4o',
46-
'openai-gpt-4o-mini',
47-
'gemini-1.5-pro',
48-
'gemini-1.5-flash',
44+
'openai_gpt_3.5',
45+
'openai_gpt_4o',
46+
'openai_gpt_4o_mini',
47+
'gemini_1.5_pro',
48+
'gemini_1.5_flash',
4949
'azure_ai_gpt_35',
5050
'azure_ai_gpt_4o',
5151
'ollama_llama3',

0 commit comments

Comments
 (0)