|
20 | 20 | persist_directory='./pharma_db')
|
21 | 21 |
|
22 | 22 | def format_docs(docs):
|
| 23 | + """Formats a list of document objects into a single string. |
| 24 | +
|
| 25 | + Args: |
| 26 | + docs (list): A list of document objects, each having a 'page_content' attribute. |
| 27 | +
|
| 28 | + Returns: |
| 29 | + str: A single string containing the page content from each document, |
| 30 | + separated by double newlines.""" |
23 | 31 | return "\n\n".join(doc.page_content for doc in docs)
|
24 | 32 |
|
25 | 33 | def add_to_db(uploaded_files):
|
| 34 | + """Processes and adds uploaded PDF files to the database. |
| 35 | +
|
| 36 | + This function checks if any files have been uploaded. If files are uploaded, |
| 37 | + it saves each file to a temporary location, processes the content using a PDF loader, |
| 38 | + and splits the content into smaller chunks. Each chunk, along with its metadata, |
| 39 | + is then added to the database. Temporary files are removed after processing. |
| 40 | +
|
| 41 | + Args: |
| 42 | + uploaded_files (list): A list of uploaded file objects to be processed. |
| 43 | +
|
| 44 | + Returns: |
| 45 | + None""" |
26 | 46 | # Check if files are uploaded
|
27 | 47 | if not uploaded_files:
|
28 | 48 | st.error("No files uploaded!")
|
@@ -59,6 +79,18 @@ def add_to_db(uploaded_files):
|
59 | 79 | os.remove(temp_file_path)
|
60 | 80 |
|
61 | 81 | def run_rag_chain(query):
|
| 82 | + """Processes a query using a Retrieval-Augmented Generation (RAG) chain. |
| 83 | +
|
| 84 | + This function utilizes a RAG chain to answer a given query. It retrieves |
| 85 | + relevant context using similarity search and then generates a response |
| 86 | + based on this context using a chat model. The chat model is pre-configured |
| 87 | + with a prompt template specialized in pharmaceutical sciences. |
| 88 | +
|
| 89 | + Args: |
| 90 | + query (str): The user's question that needs to be answered. |
| 91 | +
|
| 92 | + Returns: |
| 93 | + str: A response generated by the chat model, based on the retrieved context.""" |
62 | 94 | # Create a Retriever Object and apply Similarity Search
|
63 | 95 | retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 5})
|
64 | 96 |
|
@@ -98,6 +130,24 @@ def run_rag_chain(query):
|
98 | 130 | return response
|
99 | 131 |
|
100 | 132 | def main():
|
| 133 | + """Initialize and manage the PharmaQuery application interface. |
| 134 | +
|
| 135 | + This function sets up the Streamlit application interface for PharmaQuery, |
| 136 | + a Pharmaceutical Insight Retrieval System. Users can enter queries related |
| 137 | + to the pharmaceutical industry, upload research documents, and manage API |
| 138 | + keys for enhanced functionality. |
| 139 | +
|
| 140 | + The main features include: |
| 141 | + - Query input area for users to ask questions about the pharmaceutical industry. |
| 142 | + - Submission button to process the query and display the retrieved insights. |
| 143 | + - Sidebar for API key input and management. |
| 144 | + - File uploader for adding research documents to the database, enhancing query responses. |
| 145 | +
|
| 146 | + Args: |
| 147 | + None |
| 148 | +
|
| 149 | + Returns: |
| 150 | + None""" |
101 | 151 | st.set_page_config(page_title="PharmaQuery", page_icon=":microscope:")
|
102 | 152 | st.header("Pharmaceutical Insight Retrieval System")
|
103 | 153 |
|
|
0 commit comments