Skip to content

Commit 22cd958

Browse files
committed
exclude indexing private csv file if code executor tool is enabled
1 parent 249edf5 commit 22cd958

File tree

2 files changed

+37
-12
lines changed
  • templates
    • components/services/python
    • types/streaming/fastapi/app/api/routers

2 files changed

+37
-12
lines changed

templates/components/services/python/file.py

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import uuid
66
from io import BytesIO
77
from pathlib import Path
8-
from typing import List, Optional, Tuple
8+
from typing import Dict, List, Optional, Tuple
99

1010
from app.engine.index import IndexConfig, get_index
1111
from app.engine.utils.file_helper import FileMetadata, save_file
@@ -15,6 +15,7 @@
1515
_try_loading_included_file_formats as get_file_loaders_map,
1616
)
1717
from llama_index.core.schema import Document
18+
from llama_index.core.tools.function_tool import FunctionTool
1819
from llama_index.indices.managed.llama_cloud.base import LlamaCloudIndex
1920
from llama_index.readers.file import FlatReader
2021

@@ -112,7 +113,7 @@ def _add_file_to_llama_cloud_index(
112113
index: LlamaCloudIndex,
113114
file_name: str,
114115
file_data: bytes,
115-
) -> None:
116+
) -> str:
116117
"""
117118
Add the file to the LlamaCloud index.
118119
LlamaCloudIndex is a managed index so we can directly use the files.
@@ -126,13 +127,13 @@ def _add_file_to_llama_cloud_index(
126127
pipeline_id = index._get_pipeline_id()
127128
# LlamaCloudIndex is a managed index so we can directly use the files
128129
upload_file = (file_name, BytesIO(file_data))
129-
_ = LLamaCloudFileService.add_file_to_pipeline(
130+
doc_id = LLamaCloudFileService.add_file_to_pipeline(
130131
project_id,
131132
pipeline_id,
132133
upload_file,
133134
custom_metadata={},
134135
)
135-
return None
136+
return doc_id
136137

137138
@staticmethod
138139
def _sanitize_file_name(file_name: str) -> str:
@@ -161,14 +162,37 @@ def process_file(
161162
file_data, extension = cls._preprocess_base64_file(base64_content)
162163
file_metadata = cls._store_file(new_file_name, file_data)
163164

164-
# Insert the file into the index
165-
if isinstance(index, LlamaCloudIndex):
166-
_ = cls._add_file_to_llama_cloud_index(index, new_file_name, file_data)
165+
tools = cls._get_available_tools()
166+
code_executor_tools = ["interpreter", "artifact"]
167+
# If the file is CSV and there is a code executor tool, we don't need to index.
168+
if extension == ".csv" and any(tool in tools for tool in code_executor_tools):
169+
return file_metadata
167170
else:
168-
documents = cls._load_file_to_documents(file_metadata)
169-
cls._add_documents_to_vector_store_index(documents, index)
170-
# Add document ids to the file metadata
171-
file_metadata.refs = [doc.doc_id for doc in documents]
171+
# Insert the file into the index and update document ids to the file metadata
172+
if isinstance(index, LlamaCloudIndex):
173+
doc_id = cls._add_file_to_llama_cloud_index(
174+
index, new_file_name, file_data
175+
)
176+
# Add document ids to the file metadata
177+
file_metadata.refs = [doc_id]
178+
else:
179+
documents = cls._load_file_to_documents(file_metadata)
180+
cls._add_documents_to_vector_store_index(documents, index)
181+
# Add document ids to the file metadata
182+
file_metadata.refs = [doc.doc_id for doc in documents]
172183

173184
# Return the file metadata
174185
return file_metadata
186+
187+
@staticmethod
188+
def _get_available_tools() -> Dict[str, List[FunctionTool]]:
189+
try:
190+
from app.engine.tools import ToolFactory
191+
192+
tools = ToolFactory.from_env(map_result=True)
193+
return tools
194+
except ImportError:
195+
# There is no tool code
196+
return {}
197+
except Exception as e:
198+
raise ValueError(f"Failed to get available tools: {e}") from e

templates/types/streaming/fastapi/app/api/routers/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,8 @@ def get_chat_document_ids(self) -> List[str]:
258258
uploaded_files = self.get_uploaded_files()
259259
for _file in uploaded_files:
260260
refs = _file.metadata.refs
261-
document_ids.extend(refs)
261+
if refs is not None:
262+
document_ids.extend(refs)
262263
return list(set(document_ids))
263264

264265
def get_uploaded_files(self) -> List[File]:

0 commit comments

Comments
 (0)