5
5
import uuid
6
6
from io import BytesIO
7
7
from pathlib import Path
8
- from typing import List , Optional , Tuple
8
+ from typing import Dict , List , Optional , Tuple
9
9
10
10
from app .engine .index import IndexConfig , get_index
11
11
from app .engine .utils .file_helper import FileMetadata , save_file
15
15
_try_loading_included_file_formats as get_file_loaders_map ,
16
16
)
17
17
from llama_index .core .schema import Document
18
+ from llama_index .core .tools .function_tool import FunctionTool
18
19
from llama_index .indices .managed .llama_cloud .base import LlamaCloudIndex
19
20
from llama_index .readers .file import FlatReader
20
21
@@ -112,7 +113,7 @@ def _add_file_to_llama_cloud_index(
112
113
index : LlamaCloudIndex ,
113
114
file_name : str ,
114
115
file_data : bytes ,
115
- ) -> None :
116
+ ) -> str :
116
117
"""
117
118
Add the file to the LlamaCloud index.
118
119
LlamaCloudIndex is a managed index so we can directly use the files.
@@ -126,13 +127,13 @@ def _add_file_to_llama_cloud_index(
126
127
pipeline_id = index ._get_pipeline_id ()
127
128
# LlamaCloudIndex is a managed index so we can directly use the files
128
129
upload_file = (file_name , BytesIO (file_data ))
129
- _ = LLamaCloudFileService .add_file_to_pipeline (
130
+ doc_id = LLamaCloudFileService .add_file_to_pipeline (
130
131
project_id ,
131
132
pipeline_id ,
132
133
upload_file ,
133
134
custom_metadata = {},
134
135
)
135
- return None
136
+ return doc_id
136
137
137
138
@staticmethod
138
139
def _sanitize_file_name (file_name : str ) -> str :
@@ -161,14 +162,37 @@ def process_file(
161
162
file_data , extension = cls ._preprocess_base64_file (base64_content )
162
163
file_metadata = cls ._store_file (new_file_name , file_data )
163
164
164
- # Insert the file into the index
165
- if isinstance (index , LlamaCloudIndex ):
166
- _ = cls ._add_file_to_llama_cloud_index (index , new_file_name , file_data )
165
+ tools = cls ._get_available_tools ()
166
+ code_executor_tools = ["interpreter" , "artifact" ]
167
+ # If the file is CSV and there is a code executor tool, we don't need to index.
168
+ if extension == ".csv" and any (tool in tools for tool in code_executor_tools ):
169
+ return file_metadata
167
170
else :
168
- documents = cls ._load_file_to_documents (file_metadata )
169
- cls ._add_documents_to_vector_store_index (documents , index )
170
- # Add document ids to the file metadata
171
- file_metadata .refs = [doc .doc_id for doc in documents ]
171
+ # Insert the file into the index and update document ids to the file metadata
172
+ if isinstance (index , LlamaCloudIndex ):
173
+ doc_id = cls ._add_file_to_llama_cloud_index (
174
+ index , new_file_name , file_data
175
+ )
176
+ # Add document ids to the file metadata
177
+ file_metadata .refs = [doc_id ]
178
+ else :
179
+ documents = cls ._load_file_to_documents (file_metadata )
180
+ cls ._add_documents_to_vector_store_index (documents , index )
181
+ # Add document ids to the file metadata
182
+ file_metadata .refs = [doc .doc_id for doc in documents ]
172
183
173
184
# Return the file metadata
174
185
return file_metadata
186
+
187
+ @staticmethod
188
+ def _get_available_tools () -> Dict [str , List [FunctionTool ]]:
189
+ try :
190
+ from app .engine .tools import ToolFactory
191
+
192
+ tools = ToolFactory .from_env (map_result = True )
193
+ return tools
194
+ except ImportError :
195
+ # There is no tool code
196
+ return {}
197
+ except Exception as e :
198
+ raise ValueError (f"Failed to get available tools: { e } " ) from e
0 commit comments