From 3befca493e13756608e13a1f251b7f490040136b Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 5 Jul 2024 12:12:53 +0700 Subject: [PATCH 01/45] feat: get llamacloud file url in fast api --- .../fastapi/app/api/routers/models.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py index aa9414e72..ba4c7411a 100644 --- a/templates/types/streaming/fastapi/app/api/routers/models.py +++ b/templates/types/streaming/fastapi/app/api/routers/models.py @@ -1,5 +1,6 @@ import os import logging +import requests from pydantic import BaseModel, Field, validator from pydantic.alias_generators import to_camel from typing import List, Any, Optional, Dict @@ -112,6 +113,40 @@ def is_last_message_from_user(self) -> bool: return self.messages[-1].role == MessageRole.USER +class LLamaCloudFileService(BaseModel): + @classmethod + def get_files(cls, pipeline_id: str) -> List[Dict[str, Any]]: + url = f"https://cloud.llamaindex.ai/api/v1/pipelines/{pipeline_id}/files" + payload = {} + headers = { + "Accept": "application/json", + "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}', + } + response = requests.request("GET", url, headers=headers, data=payload) + return response.json() + + @classmethod + def get_file_detail(cls, project_id: str, file_id: str) -> str: + url = f"https://cloud.llamaindex.ai/api/v1/files/{file_id}/content?project_id={project_id}" + payload = {} + headers = { + "Accept": "application/json", + "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}', + } + response = requests.request("GET", url, headers=headers, data=payload) + return response.json() + + @classmethod + def get_file_url(cls, name: str, pipeline_id: str) -> str | None: + files = cls.get_files(pipeline_id) + for file in files: + if file["name"] == name: + file_id = file["file_id"] + project_id = file["project_id"] + return cls.get_file_detail(project_id, file_id)["url"] + return None + + class SourceNodes(BaseModel): id: str metadata: Dict[str, Any] @@ -124,6 +159,12 @@ def from_source_node(cls, source_node: NodeWithScore): metadata = source_node.node.metadata url = metadata.get("URL") + # if metadata has pipeline_id, get file url from LLamaCloudFileService + pipeline_id = metadata.get("pipeline_id") + if pipeline_id: + file_name = metadata.get("file_name") + url = LLamaCloudFileService.get_file_url(file_name, pipeline_id) + if not url: file_name = metadata.get("file_name") url_prefix = os.getenv("FILESERVER_URL_PREFIX") From 46823c07f3c4d076e97765a61d21907358ff953f Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:05:56 +0700 Subject: [PATCH 02/45] refactor: move llamacloud to service --- .../fastapi/app/api/routers/models.py | 35 +----------------- .../types/streaming/fastapi/app/service.py | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+), 34 deletions(-) create mode 100644 templates/types/streaming/fastapi/app/service.py diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py index ba4c7411a..430c44482 100644 --- a/templates/types/streaming/fastapi/app/api/routers/models.py +++ b/templates/types/streaming/fastapi/app/api/routers/models.py @@ -6,6 +6,7 @@ from typing import List, Any, Optional, Dict from llama_index.core.schema import NodeWithScore from llama_index.core.llms import ChatMessage, MessageRole +from app.service import LLamaCloudFileService logger = logging.getLogger("uvicorn") @@ -113,40 +114,6 @@ def is_last_message_from_user(self) -> bool: return self.messages[-1].role == MessageRole.USER -class LLamaCloudFileService(BaseModel): - @classmethod - def get_files(cls, pipeline_id: str) -> List[Dict[str, Any]]: - url = f"https://cloud.llamaindex.ai/api/v1/pipelines/{pipeline_id}/files" - payload = {} - headers = { - "Accept": "application/json", - "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}', - } - response = requests.request("GET", url, headers=headers, data=payload) - return response.json() - - @classmethod - def get_file_detail(cls, project_id: str, file_id: str) -> str: - url = f"https://cloud.llamaindex.ai/api/v1/files/{file_id}/content?project_id={project_id}" - payload = {} - headers = { - "Accept": "application/json", - "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}', - } - response = requests.request("GET", url, headers=headers, data=payload) - return response.json() - - @classmethod - def get_file_url(cls, name: str, pipeline_id: str) -> str | None: - files = cls.get_files(pipeline_id) - for file in files: - if file["name"] == name: - file_id = file["file_id"] - project_id = file["project_id"] - return cls.get_file_detail(project_id, file_id)["url"] - return None - - class SourceNodes(BaseModel): id: str metadata: Dict[str, Any] diff --git a/templates/types/streaming/fastapi/app/service.py b/templates/types/streaming/fastapi/app/service.py new file mode 100644 index 000000000..41842a867 --- /dev/null +++ b/templates/types/streaming/fastapi/app/service.py @@ -0,0 +1,37 @@ +import os +import requests +from typing import List, Any, Dict + + +class LLamaCloudFileService: + @classmethod + def get_files(cls, pipeline_id: str) -> List[Dict[str, Any]]: + url = f"https://cloud.llamaindex.ai/api/v1/pipelines/{pipeline_id}/files" + payload = {} + headers = { + "Accept": "application/json", + "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}', + } + response = requests.request("GET", url, headers=headers, data=payload) + return response.json() + + @classmethod + def get_file_detail(cls, project_id: str, file_id: str) -> str: + url = f"https://cloud.llamaindex.ai/api/v1/files/{file_id}/content?project_id={project_id}" + payload = {} + headers = { + "Accept": "application/json", + "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}', + } + response = requests.request("GET", url, headers=headers, data=payload) + return response.json() + + @classmethod + def get_file_url(cls, name: str, pipeline_id: str) -> str | None: + files = cls.get_files(pipeline_id) + for file in files: + if file["name"] == name: + file_id = file["file_id"] + project_id = file["project_id"] + return cls.get_file_detail(project_id, file_id)["url"] + return None From 0854c5b45370a73164a233e80376883e95c5c968 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:16:47 +0700 Subject: [PATCH 03/45] fix: long url should be truncated --- .../app/components/ui/chat/chat-message/chat-sources.tsx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx index a43664364..1d4ccb614 100644 --- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx +++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx @@ -90,7 +90,11 @@ function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) { // add a link to view its URL and a button to copy the URL to the clipboard return (