Skip to content

Commit 99aedcd

Browse files
authored
fix: outdated code snippets for loaders module in doc (#1310)
1 parent a60a682 commit 99aedcd

File tree

10 files changed

+300
-276
lines changed

10 files changed

+300
-276
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,6 @@ temp_files/
428428
#Benchmark
429429
benchmark/gaia/Dataset
430430
benchmark/gaia/results.jsonl
431+
432+
# Secret files for docker
433+
.container/.env

camel/loaders/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,16 @@
1313
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1414

1515
from .apify_reader import Apify
16-
from .base_io import File
16+
from .base_io import File, create_file, create_file_from_raw_bytes
1717
from .chunkr_reader import ChunkrReader
1818
from .firecrawl_reader import Firecrawl
1919
from .jina_url_reader import JinaURLReader
2020
from .unstructured_io import UnstructuredIO
2121

2222
__all__ = [
2323
'File',
24+
'create_file',
25+
'create_file_from_raw_bytes',
2426
'UnstructuredIO',
2527
'JinaURLReader',
2628
'Firecrawl',

camel/loaders/base_io.py

+41-41
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,47 @@
2222
from camel.utils import dependencies_required
2323

2424

25+
def create_file(file: BytesIO, filename: str) -> "File":
26+
r"""Reads an uploaded file and returns a File object.
27+
28+
Args:
29+
file (BytesIO): A BytesIO object representing the contents of the
30+
file.
31+
filename (str): The name of the file.
32+
33+
Returns:
34+
File: A File object.
35+
"""
36+
ext_to_cls = {
37+
"docx": DocxFile,
38+
"pdf": PdfFile,
39+
"txt": TxtFile,
40+
"json": JsonFile,
41+
"html": HtmlFile,
42+
}
43+
44+
ext = filename.split(".")[-1].lower()
45+
if ext not in ext_to_cls:
46+
raise NotImplementedError(f"File type {ext} not supported")
47+
48+
out_file = ext_to_cls[ext].from_bytes(file, filename)
49+
return out_file
50+
51+
52+
def create_file_from_raw_bytes(raw_bytes: bytes, filename: str) -> "File":
53+
r"""Reads raw bytes and returns a File object.
54+
55+
Args:
56+
raw_bytes (bytes): The raw bytes content of the file.
57+
filename (str): The name of the file.
58+
59+
Returns:
60+
File: A File object.
61+
"""
62+
file = BytesIO(raw_bytes)
63+
return create_file(file, filename)
64+
65+
2566
class File(ABC):
2667
r"""Represents an uploaded file comprised of Documents.
2768
@@ -79,47 +120,6 @@ def from_raw_bytes(cls, raw_bytes: bytes, filename: str) -> "File":
79120
file = BytesIO(raw_bytes)
80121
return cls.from_bytes(file, filename)
81122

82-
@staticmethod
83-
def create_file(file: BytesIO, filename: str) -> "File":
84-
r"""Reads an uploaded file and returns a File object.
85-
86-
Args:
87-
file (BytesIO): A BytesIO object representing the contents of the
88-
file.
89-
filename (str): The name of the file.
90-
91-
Returns:
92-
File: A File object.
93-
"""
94-
ext_to_cls = {
95-
"docx": DocxFile,
96-
"pdf": PdfFile,
97-
"txt": TxtFile,
98-
"json": JsonFile,
99-
"html": HtmlFile,
100-
}
101-
102-
ext = filename.split(".")[-1].lower()
103-
if ext not in ext_to_cls:
104-
raise NotImplementedError(f"File type {ext} not supported")
105-
106-
out_file = ext_to_cls[ext].from_bytes(file, filename)
107-
return out_file
108-
109-
@staticmethod
110-
def create_file_from_raw_bytes(raw_bytes: bytes, filename: str) -> "File":
111-
r"""Reads raw bytes and returns a File object.
112-
113-
Args:
114-
raw_bytes (bytes): The raw bytes content of the file.
115-
filename (str): The name of the file.
116-
117-
Returns:
118-
File: A File object.
119-
"""
120-
file = BytesIO(raw_bytes)
121-
return File.create_file(file, filename)
122-
123123
def __repr__(self) -> str:
124124
return (
125125
f"File(name={self.name}, id={self.file_id}, "

camel/storages/object_storages/amazon_s3.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import Optional, Tuple
1818
from warnings import warn
1919

20-
from camel.loaders import File
20+
from camel.loaders import File, create_file_from_raw_bytes
2121
from camel.storages.object_storages.base import BaseObjectStorage
2222

2323

@@ -156,7 +156,7 @@ def _get_file(self, file_key: str, filename: str) -> File:
156156
Bucket=self._bucket_name, Key=file_key
157157
)
158158
raw_bytes = response["Body"].read()
159-
return File.create_file_from_raw_bytes(raw_bytes, filename)
159+
return create_file_from_raw_bytes(raw_bytes, filename)
160160

161161
def _upload_file(
162162
self, local_file_path: Path, remote_file_key: str

camel/storages/object_storages/azure_blob.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from typing import Optional, Tuple
1717
from warnings import warn
1818

19-
from camel.loaders import File
19+
from camel.loaders import File, create_file_from_raw_bytes
2020
from camel.storages.object_storages.base import BaseObjectStorage
2121

2222

@@ -123,7 +123,7 @@ def _get_file(self, file_key: str, filename: str) -> File:
123123
File: The object from the container.
124124
"""
125125
raw_bytes = self._client.download_blob(file_key).readall()
126-
file = File.create_file_from_raw_bytes(raw_bytes, filename)
126+
file = create_file_from_raw_bytes(raw_bytes, filename)
127127
return file
128128

129129
def _upload_file(

camel/storages/object_storages/google_cloud.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from typing import Tuple
1616
from warnings import warn
1717

18-
from camel.loaders import File
18+
from camel.loaders import File, create_file_from_raw_bytes
1919
from camel.storages.object_storages.base import BaseObjectStorage
2020

2121

@@ -111,7 +111,7 @@ def _get_file(self, file_key: str, filename: str) -> File:
111111
File: The object from the S3 bucket.
112112
"""
113113
raw_bytes = self._client.get_blob(file_key).download_as_bytes()
114-
return File.create_file_from_raw_bytes(raw_bytes, filename)
114+
return create_file_from_raw_bytes(raw_bytes, filename)
115115

116116
def _upload_file(
117117
self, local_file_path: Path, remote_file_key: str

docs/key_modules/loaders.md

+10-6
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,14 @@ This module is designed to read files of various formats, extract their contents
3333

3434
```python
3535
from io import BytesIO
36-
from camel.loaders import read_file
36+
from camel.loaders import create_file_from_raw_bytes
3737

3838
# Read a pdf file from disk
3939
with open("test.pdf", "rb") as file:
40-
file_content = BytesIO(file.read())
41-
file_content.name = "test.pdf"
40+
file_content = file.read()
4241

43-
# Use the read_file function to create an object based on the file extension
44-
file_obj = read_file(file_content)
42+
# Use the create_file function to create an object based on the file extension
43+
file_obj = create_file_from_raw_bytes(file_content, "test.pdf")
4544

4645
# Once you have the File object, you can access its content
4746
print(file_obj.docs[0]["page_content"])
@@ -53,6 +52,9 @@ To get started with the `Unstructured IO` module, you first need to import the m
5352

5453
Utilize `parse_file_or_url` to load and parse unstructured data from a file or URL
5554
```python
55+
from camel.loaders import UnstructuredIO
56+
57+
uio = UnstructuredIO()
5658
# Set example url
5759
example_url = (
5860
"https://www.cnn.com/2023/01/30/sport/empire-state-building-green-"
@@ -93,7 +95,7 @@ print(cleaned_text)
9395
Utilize `extract_data_from_text` to do text extraction operation
9496
```python
9597
# Set example email to extract
96-
example_email_text = ("Contact me at [email protected].")
98+
example_email_text = "Contact me at [email protected]."
9799

98100
extracted_text = uio.extract_data_from_text(text=example_email_text,
99101
extract_type="extract_email_address")
@@ -139,6 +141,8 @@ This is a basic guide to get you started with the `Unstructured IO` module. For
139141

140142
Initialize the client, set up the required actors and parameters.
141143
```python
144+
from camel.loaders import Apify
145+
142146
apify = Apify()
143147

144148
run_input = {

0 commit comments

Comments
 (0)