Skip to content

upload file to sandbox #355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
7dcbf2f
tmp
leehuwuj Oct 8, 2024
1e2502a
update private file handler
leehuwuj Oct 11, 2024
e12bd29
enhance code
leehuwuj Oct 11, 2024
1cef23c
reduce complexity
leehuwuj Oct 11, 2024
5bd3591
fix mypy
leehuwuj Oct 11, 2024
c8a9472
fix mypy
leehuwuj Oct 11, 2024
5fd25f6
remove comment
leehuwuj Oct 11, 2024
a4d3d36
support upload file and enhance interpreter tool
leehuwuj Oct 14, 2024
6efadd4
fix blocking stream event
leehuwuj Oct 14, 2024
3e82be7
fix mypy
leehuwuj Oct 14, 2024
393a926
Merge remote-tracking branch 'origin/main' into feat/upload-file-sandbox
leehuwuj Oct 14, 2024
9602c6c
add changeset and fix mypy after merge
leehuwuj Oct 14, 2024
985cb26
fix mypy
leehuwuj Oct 14, 2024
9a4c0a3
enhance code
leehuwuj Oct 14, 2024
2efc727
typing
leehuwuj Oct 14, 2024
249edf5
wording
leehuwuj Oct 15, 2024
22cd958
exclude indexing private csv file if code executor tool is enabled
leehuwuj Oct 15, 2024
30e408b
remove file content and duplicated file id
leehuwuj Oct 15, 2024
94b338a
simpler file upload
leehuwuj Oct 15, 2024
6bb7a30
support for TS
leehuwuj Oct 15, 2024
bbf321f
support file upload for artifact in TS
leehuwuj Oct 15, 2024
852e6ec
enhance file path
leehuwuj Oct 15, 2024
5ae6b57
enhance code
leehuwuj Oct 15, 2024
c64e2ba
revise vercel streaming
leehuwuj Oct 15, 2024
36cdb1e
remove redundant id
leehuwuj Oct 15, 2024
e0921fe
add show file widget to the
leehuwuj Oct 15, 2024
a3c1c55
allow upload file with empty index store
leehuwuj Oct 15, 2024
bae12e6
Merge branch 'main' into feat/upload-file-sandbox
marcusschiesser Oct 15, 2024
7d9dee2
add data scientist use case
marcusschiesser Oct 15, 2024
3b91e7b
use GPT4o model for data scientist and code artifact
marcusschiesser Oct 15, 2024
954113e
update comments
leehuwuj Oct 15, 2024
624aea7
use previewcard to render documents
marcusschiesser Oct 15, 2024
788fab0
fix: UI overlap, key warning, wrong filename and url in markdown
thucpn Oct 16, 2024
0f56092
use div as tag wrapper for message
thucpn Oct 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/poor-knives-smoke.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Fix event streaming is blocked
5 changes: 5 additions & 0 deletions .changeset/wet-tips-judge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Add upload file to sandbox
2 changes: 1 addition & 1 deletion helpers/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ For better results, you can specify the region parameter to get results from a s
dependencies: [
{
name: "e2b_code_interpreter",
version: "0.0.10",
version: "^0.0.11b38",
},
],
supportedFrameworks: ["fastapi", "express", "nextjs"],
Expand Down
21 changes: 16 additions & 5 deletions templates/components/engines/python/agent/tools/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,29 @@ class CodeGeneratorTool:
def __init__(self):
pass

def artifact(self, query: str, old_code: Optional[str] = None) -> Dict:
"""Generate a code artifact based on the input.
def artifact(
self,
query: str,
sandbox_files: Optional[List[str]] = None,
old_code: Optional[str] = None,
) -> Dict:
"""Generate a code artifact based on the provided input.

Args:
query (str): The description of the application you want to build.
query (str): A description of the application you want to build.
sandbox_files (Optional[List[str]], optional): A list of sandbox file paths. Defaults to None. Include these files if the code requires them.
old_code (Optional[str], optional): The existing code to be modified. Defaults to None.

Returns:
Dict: A dictionary containing the generated artifact information.
Dict: A dictionary containing information about the generated artifact.
"""

if old_code:
user_message = f"{query}\n\nThe existing code is: \n```\n{old_code}\n```"
else:
user_message = query
if sandbox_files:
user_message += f"\n\nThe provided files are: \n{str(sandbox_files)}"

messages: List[ChatMessage] = [
ChatMessage(role="system", content=CODE_GENERATION_PROMPT),
Expand All @@ -90,7 +98,10 @@ def artifact(self, query: str, old_code: Optional[str] = None) -> Dict:
sllm = Settings.llm.as_structured_llm(output_cls=CodeArtifact) # type: ignore
response = sllm.chat(messages)
data: CodeArtifact = response.raw
return data.model_dump()
data_dict = data.model_dump()
if sandbox_files:
data_dict["files"] = sandbox_files
return data_dict
except Exception as e:
logger.error(f"Failed to generate artifact: {str(e)}")
raise e
Expand Down
166 changes: 117 additions & 49 deletions templates/components/engines/python/agent/tools/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import logging
import os
import uuid
from typing import Dict, List, Optional
from typing import List, Optional

from app.engine.utils.file_helper import FileMetadata, save_file
from e2b_code_interpreter import CodeInterpreter
from e2b_code_interpreter.models import Logs
from llama_index.core.tools import FunctionTool
from pydantic import BaseModel

logger = logging.getLogger(__name__)
logger = logging.getLogger("uvicorn")


class InterpreterExtraResult(BaseModel):
Expand All @@ -22,11 +23,14 @@ class InterpreterExtraResult(BaseModel):
class E2BToolOutput(BaseModel):
is_error: bool
logs: Logs
error_message: Optional[str] = None
results: List[InterpreterExtraResult] = []
retry_count: int = 0


class E2BCodeInterpreter:
output_dir = "output/tools"
uploaded_files_dir = "output/uploaded"

def __init__(self, api_key: str = None):
if api_key is None:
Expand All @@ -42,40 +46,53 @@ def __init__(self, api_key: str = None):
)

self.filesever_url_prefix = filesever_url_prefix
self.interpreter = CodeInterpreter(api_key=api_key)
self.interpreter = None
self.api_key = api_key

def __del__(self):
self.interpreter.close()

def get_output_path(self, filename: str) -> str:
# if output directory doesn't exist, create it
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir, exist_ok=True)
return os.path.join(self.output_dir, filename)
"""
Kill the interpreter when the tool is no longer in use
"""
if self.interpreter is not None:
self.interpreter.kill()

def save_to_disk(self, base64_data: str, ext: str) -> Dict:
filename = f"{uuid.uuid4()}.{ext}" # generate a unique filename
def _init_interpreter(self, sandbox_files: List[str] = []):
"""
Lazily initialize the interpreter.
"""
logger.info(f"Initializing interpreter with {len(sandbox_files)} files")
self.interpreter = CodeInterpreter(api_key=self.api_key)
if len(sandbox_files) > 0:
for file_path in sandbox_files:
# The file path is a local path, but sometimes AI passes a sandbox file path
# We need to support both
file_name = os.path.basename(file_path)
if file_path.startswith("/tmp"):
# The file path is a sandbox file path
sandbox_file_path = file_path
local_file_path = os.path.join(self.uploaded_files_dir, file_name)
else:
# The file path is a local file path
local_file_path = file_path
sandbox_file_path = f"tmp/{file_name}"

with open(local_file_path, "rb") as f:
content = f.read()
if self.interpreter and self.interpreter.files:
self.interpreter.files.write(sandbox_file_path, content)
logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")

def _save_to_disk(self, base64_data: str, ext: str) -> FileMetadata:
buffer = base64.b64decode(base64_data)
output_path = self.get_output_path(filename)

try:
with open(output_path, "wb") as file:
file.write(buffer)
except IOError as e:
logger.error(f"Failed to write to file {output_path}: {str(e)}")
raise e

logger.info(f"Saved file to {output_path}")
filename = f"{uuid.uuid4()}.{ext}" # generate a unique filename
output_path = os.path.join(self.output_dir, filename)

return {
"outputPath": output_path,
"filename": filename,
}
file_metadata = save_file(buffer, file_path=output_path)

def get_file_url(self, filename: str) -> str:
return f"{self.filesever_url_prefix}/{self.output_dir}/{filename}"
return file_metadata

def parse_result(self, result) -> List[InterpreterExtraResult]:
def _parse_result(self, result) -> List[InterpreterExtraResult]:
"""
The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64
We save each result to disk and return saved file metadata (extension, filename, url)
Expand All @@ -92,16 +109,20 @@ def parse_result(self, result) -> List[InterpreterExtraResult]:
for ext, data in zip(formats, results):
match ext:
case "png" | "svg" | "jpeg" | "pdf":
result = self.save_to_disk(data, ext)
filename = result["filename"]
file_metadata = self._save_to_disk(data, ext)
output.append(
InterpreterExtraResult(
type=ext,
filename=filename,
url=self.get_file_url(filename),
filename=file_metadata.name,
url=file_metadata.url,
)
)
case _:
# Try serialize data to string
try:
data = str(data)
except Exception as e:
data = f"Error when serializing data: {e}"
output.append(
InterpreterExtraResult(
type=ext,
Expand All @@ -114,28 +135,75 @@ def parse_result(self, result) -> List[InterpreterExtraResult]:

return output

def interpret(self, code: str) -> E2BToolOutput:
def interpret(
self,
code: str,
sandbox_files: List[str] = [],
retry_count: int = 0,
) -> E2BToolOutput:
"""
Execute python code in a Jupyter notebook cell, the toll will return result, stdout, stderr, display_data, and error.
Execute Python code in a Jupyter notebook cell. The tool will return the result, stdout, stderr, display_data, and error.
If the code needs to use a file, ALWAYS pass the file path in the sandbox_files argument.
You have a maximum of 3 retries to get the code to run successfully.

Parameters:
code (str): The python code to be executed in a single cell.
code (str): The Python code to be executed in a single cell.
sandbox_files (List[str]): List of local file paths to be used by the code. The tool will throw an error if a file is not found.
retry_count (int): Number of times the tool has been retried.
"""
logger.info(
f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
)
exec = self.interpreter.notebook.exec_cell(code)

if exec.error:
logger.error("Error when executing code", exec.error)
output = E2BToolOutput(is_error=True, logs=exec.logs, results=[])
else:
if len(exec.results) == 0:
output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
if retry_count > 2:
return E2BToolOutput(
is_error=True,
logs=Logs(
stdout="",
stderr="",
display_data="",
error="",
),
error_message="Failed to execute the code after 3 retries. Explain the error to the user and suggest a fix.",
retry_count=retry_count,
)

if self.interpreter is None:
self._init_interpreter(sandbox_files)

if self.interpreter and self.interpreter.notebook:
logger.info(
f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
)
exec = self.interpreter.notebook.exec_cell(code)

if exec.error:
error_message = f"The code failed to execute successfully. Error: {exec.error}. Try to fix the code and run again."
logger.error(error_message)
# There would be an error from previous execution, kill the interpreter and return with error message
try:
self.interpreter.kill() # type: ignore
except Exception:
pass
finally:
self.interpreter = None
output = E2BToolOutput(
is_error=True,
logs=exec.logs,
results=[],
error_message=error_message,
retry_count=retry_count + 1,
)
else:
results = self.parse_result(exec.results[0])
output = E2BToolOutput(is_error=False, logs=exec.logs, results=results)
return output
if len(exec.results) == 0:
output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
else:
results = self._parse_result(exec.results[0])
output = E2BToolOutput(
is_error=False,
logs=exec.logs,
results=results,
retry_count=retry_count + 1,
)
return output
else:
raise ValueError("Interpreter is not initialized.")


def get_tools(**kwargs):
Expand Down
Loading
Loading