-
Notifications
You must be signed in to change notification settings - Fork 184
feat: support uploading pdf, docx, txt #140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
8db5405
0d140f4
5a06cf9
97af04e
312b6d3
d73ee43
d458783
035e96e
f452027
f707c6f
2d19560
04a4dc9
4ba4c64
fa991f4
9c8192d
7ff4843
3ca2f65
689ad9b
ee8cb00
28696d5
cca49c5
afb5405
4b66d29
d07ffe9
948b1b6
0a195f8
d22310d
aff87bb
38f231c
2629c88
c21e843
34ab445
321d77d
259c3ec
d6afe28
6298e4b
608a338
a9fa5cd
a00cb3d
425580d
3b1c743
7a0ce3f
d5f4395
cc15059
efdd43f
07e0821
709ef1f
43e8035
fdb32b7
445b4cc
a2787ae
d48bcb8
93fde20
884bc6d
2cc21ac
ce9ce5e
27332e6
c3f70a1
20a58c1
ab279c6
b638eae
6aa7d57
1f85358
498723a
be45b3f
5c8e79c
695923c
0e8786b
3404554
0ccb51e
55684b2
197cc90
e9ad3ed
503141f
30ebbe3
f670b1a
43149bf
8068ad5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import { Document, MetadataMode, Settings, SimpleNodeParser } from "llamaindex"; | ||
import pdf from "pdf-parse"; | ||
|
||
export async function splitAndEmbed(document: string) { | ||
const nodeParser = new SimpleNodeParser({ | ||
chunkSize: Settings.chunkSize, | ||
chunkOverlap: Settings.chunkOverlap, | ||
}); | ||
const nodes = nodeParser.getNodesFromDocuments([ | ||
new Document({ text: document }), | ||
]); | ||
const texts = nodes.map((node) => node.getContent(MetadataMode.EMBED)); | ||
const embeddings = await Settings.embedModel.getTextEmbeddingsBatch(texts); | ||
return nodes.map((node, i) => ({ | ||
text: node.getContent(MetadataMode.NONE), | ||
embedding: embeddings[i], | ||
})); | ||
} | ||
|
||
export async function getPdfDetail(rawPdf: string) { | ||
const pdfBuffer = Buffer.from(rawPdf.split(",")[1], "base64"); | ||
const content = (await pdf(pdfBuffer)).text; | ||
const embeddings = await splitAndEmbed(content); | ||
return { | ||
content, | ||
embeddings, | ||
}; | ||
} | ||
marcusschiesser marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import { NextRequest, NextResponse } from "next/server"; | ||
import { initSettings } from "../engine/settings"; | ||
import { getPdfDetail } from "./embeddings"; | ||
|
||
initSettings(); | ||
|
||
export async function POST(request: NextRequest) { | ||
try { | ||
const { pdf }: { pdf: string } = await request.json(); | ||
if (!pdf) { | ||
return NextResponse.json( | ||
{ error: "pdf is required in the request body" }, | ||
{ status: 400 }, | ||
); | ||
} | ||
const pdfDetail = await getPdfDetail(pdf); | ||
return NextResponse.json(pdfDetail); | ||
} catch (error) { | ||
console.error("[Embed API]", error); | ||
return NextResponse.json( | ||
{ error: (error as Error).message }, | ||
{ status: 500 }, | ||
); | ||
} | ||
} | ||
marcusschiesser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
export const runtime = "nodejs"; | ||
export const dynamic = "force-dynamic"; |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7,8 +7,10 @@ import FileUploader from "../file-uploader"; | |||||
import { Input } from "../input"; | ||||||
import UploadCsvPreview from "../upload-csv-preview"; | ||||||
import UploadImagePreview from "../upload-image-preview"; | ||||||
import UploadPdfPreview from "../upload-pdf-preview"; | ||||||
import { ChatHandler } from "./chat.interface"; | ||||||
import { useCsv } from "./hooks/use-csv"; | ||||||
import { usePdf } from "./hooks/use-pdf "; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct the import statement for usePdf. There is an extra space at the end of the import path for - import { usePdf } from "./hooks/use-pdf ";
+ import { usePdf } from "./hooks/use-pdf"; Committable suggestion
Suggested change
|
||||||
|
||||||
export default function ChatInput( | ||||||
props: Pick< | ||||||
|
@@ -26,9 +28,10 @@ export default function ChatInput( | |||||
) { | ||||||
const [imageUrl, setImageUrl] = useState<string | null>(null); | ||||||
const { files: csvFiles, upload, remove, reset } = useCsv(); | ||||||
const { pdf, setPdf, uploadAndEmbed } = usePdf(); | ||||||
|
||||||
const getAnnotations = () => { | ||||||
if (!imageUrl && csvFiles.length === 0) return undefined; | ||||||
if (!imageUrl && csvFiles.length === 0 && !pdf) return undefined; | ||||||
const annotations: MessageAnnotation[] = []; | ||||||
if (imageUrl) { | ||||||
annotations.push({ | ||||||
|
@@ -49,6 +52,22 @@ export default function ChatInput( | |||||
}, | ||||||
}); | ||||||
} | ||||||
if (pdf) { | ||||||
annotations.push({ | ||||||
type: MessageAnnotationType.PDF, | ||||||
data: { | ||||||
pdfFiles: [ | ||||||
{ | ||||||
id: pdf.id, | ||||||
content: pdf.content, | ||||||
filename: pdf.filename, | ||||||
filesize: pdf.filesize, | ||||||
embeddings: pdf.embeddings, | ||||||
}, | ||||||
], | ||||||
}, | ||||||
}); | ||||||
} | ||||||
return annotations as JSONValue[]; | ||||||
}; | ||||||
|
||||||
|
@@ -74,6 +93,7 @@ export default function ChatInput( | |||||
handleSubmitWithAnnotations(e, annotations); | ||||||
imageUrl && setImageUrl(null); | ||||||
csvFiles.length && reset(); | ||||||
pdf && setPdf(null); | ||||||
return; | ||||||
} | ||||||
props.handleSubmit(e); | ||||||
|
@@ -84,7 +104,7 @@ export default function ChatInput( | |||||
const readContent = async (file: File): Promise<string> => { | ||||||
const content = await new Promise<string>((resolve, reject) => { | ||||||
const reader = new FileReader(); | ||||||
if (file.type.startsWith("image/")) { | ||||||
if (file.type.startsWith("image/") || file.type === "application/pdf") { | ||||||
reader.readAsDataURL(file); | ||||||
} else { | ||||||
reader.readAsText(file); | ||||||
|
@@ -113,6 +133,16 @@ export default function ChatInput( | |||||
} | ||||||
}; | ||||||
|
||||||
const handleUploadPdfFile = async (file: File) => { | ||||||
marcusschiesser marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
const base64 = await readContent(file); | ||||||
await uploadAndEmbed({ | ||||||
id: uuidv4(), | ||||||
filename: file.name, | ||||||
filesize: file.size, | ||||||
pdfBase64: base64, | ||||||
}); | ||||||
}; | ||||||
|
||||||
const handleUploadFile = async (file: File) => { | ||||||
try { | ||||||
if (file.type.startsWith("image/")) { | ||||||
|
@@ -125,6 +155,13 @@ export default function ChatInput( | |||||
} | ||||||
return await handleUploadCsvFile(file); | ||||||
} | ||||||
if (file.type === "application/pdf") { | ||||||
if (pdf) { | ||||||
alert("You can only upload one pdf file at a time."); | ||||||
return; | ||||||
} | ||||||
return await handleUploadPdfFile(file); | ||||||
} | ||||||
props.onFileUpload?.(file); | ||||||
} catch (error: any) { | ||||||
props.onFileError?.(error.message); | ||||||
|
@@ -152,6 +189,7 @@ export default function ChatInput( | |||||
})} | ||||||
</div> | ||||||
)} | ||||||
{pdf && <UploadPdfPreview pdf={pdf} onRemove={() => setPdf(null)} />} | ||||||
<div className="flex w-full items-start justify-between gap-4 "> | ||||||
<Input | ||||||
autoFocus | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,27 +4,33 @@ import { useEffect, useMemo, useState } from "react"; | |
|
||
export interface ChatConfig { | ||
chatAPI?: string; | ||
embedAPI?: string; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add error handling for configuration fetching. While the integration of - .catch((error) => console.error("Error fetching config", error));
+ .catch((error) => {
+ console.error("Error fetching config", error);
+ setConfig({ ...config, error: "Failed to fetch configuration" });
+ }); Also applies to: 33-33 |
||
starterQuestions?: string[]; | ||
} | ||
|
||
export function useClientConfig() { | ||
const API_ROUTE = "/api/chat/config"; | ||
export function useClientConfig(): ChatConfig { | ||
const chatAPI = process.env.NEXT_PUBLIC_CHAT_API; | ||
const [config, setConfig] = useState<ChatConfig>({ | ||
chatAPI, | ||
}); | ||
|
||
const configAPI = useMemo(() => { | ||
const backendOrigin = chatAPI ? new URL(chatAPI).origin : ""; | ||
return `${backendOrigin}${API_ROUTE}`; | ||
const backendOrigin = useMemo(() => { | ||
return chatAPI ? new URL(chatAPI).origin : ""; | ||
}, [chatAPI]); | ||
|
||
const configAPI = `${backendOrigin}/api/chat/config`; | ||
const embedAPI = `${backendOrigin}/api/chat/embed`; | ||
|
||
useEffect(() => { | ||
fetch(configAPI) | ||
.then((response) => response.json()) | ||
.then((data) => setConfig({ ...data, chatAPI })) | ||
.catch((error) => console.error("Error fetching config", error)); | ||
}, [chatAPI, configAPI]); | ||
|
||
return config; | ||
return { | ||
chatAPI, | ||
embedAPI, | ||
starterQuestions: config.starterQuestions, | ||
marcusschiesser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
"use client"; | ||
|
||
import { useState } from "react"; | ||
import { PdfFile } from ".."; | ||
import { useClientConfig } from "./use-config"; | ||
|
||
export function usePdf() { | ||
marcusschiesser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const { embedAPI } = useClientConfig(); | ||
const [pdf, setPdf] = useState<PdfFile | null>(null); | ||
|
||
const getPdfDetail = async ( | ||
pdfBase64: string, | ||
): Promise<Pick<PdfFile, "content" | "embeddings">> => { | ||
if (!embedAPI) throw new Error("Embed API is not defined"); | ||
const response = await fetch(embedAPI, { | ||
method: "POST", | ||
headers: { | ||
"Content-Type": "application/json", | ||
}, | ||
body: JSON.stringify({ | ||
pdf: pdfBase64, | ||
}), | ||
}); | ||
if (!response.ok) throw new Error("Failed to get pdf detail"); | ||
const data = await response.json(); | ||
return data; | ||
}; | ||
|
||
const uploadAndEmbed = async (pdf: { | ||
id: string; | ||
filename: string; | ||
filesize: number; | ||
pdfBase64: string; | ||
}) => { | ||
const { pdfBase64, ...rest } = pdf; | ||
const pdfDetail = await getPdfDetail(pdfBase64); | ||
setPdf({ ...pdfDetail, ...rest }); | ||
return pdfDetail; | ||
}; | ||
|
||
return { pdf, setPdf, uploadAndEmbed }; | ||
} |
Uh oh!
There was an error while loading. Please reload this page.