Skip to content

Commit a221cfc

Browse files
authored
feat: use LlamaParse for all the supported types (#154)
1 parent d3f92f8 commit a221cfc

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

.changeset/olive-knives-cheat.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"create-llama": patch
3+
---
4+
5+
Use LlamaParse for all the file types that it supports (if activated)

templates/components/loaders/python/file.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import logging
3+
from typing import Dict
34
from llama_parse import LlamaParse
45
from pydantic import BaseModel, validator
56

@@ -32,22 +33,33 @@ def llama_parse_parser():
3233
return parser
3334

3435

36+
def llama_parse_extractor() -> Dict[str, LlamaParse]:
37+
from llama_parse.utils import SUPPORTED_FILE_TYPES
38+
39+
parser = llama_parse_parser()
40+
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
41+
42+
3543
def get_file_documents(config: FileLoaderConfig):
3644
from llama_index.core.readers import SimpleDirectoryReader
3745

3846
try:
39-
reader = SimpleDirectoryReader(
40-
config.data_dir, recursive=True, filename_as_id=True, raise_on_error=True
41-
)
47+
file_extractor = None
4248
if config.use_llama_parse:
4349
# LlamaParse is async first,
4450
# so we need to use nest_asyncio to run it in sync mode
4551
import nest_asyncio
4652

4753
nest_asyncio.apply()
4854

49-
parser = llama_parse_parser()
50-
reader.file_extractor = {".pdf": parser}
55+
file_extractor = llama_parse_extractor()
56+
reader = SimpleDirectoryReader(
57+
config.data_dir,
58+
recursive=True,
59+
filename_as_id=True,
60+
raise_on_error=True,
61+
file_extractor=file_extractor,
62+
)
5163
return reader.load_data()
5264
except Exception as e:
5365
import sys, traceback

0 commit comments

Comments
 (0)