backend/ingest-demo.ipynb
import { SupabaseVectorStore } from "@langchain/community/vectorstores/supabase";
import { OpenAIEmbeddings } from "@langchain/openai";
import { createClient } from "@supabase/supabase-js";
import dotenv from "dotenv";
dotenv.config();
const embeddings = new OpenAIEmbeddings({
model: "text-embedding-3-small",
});
const supabaseClient = createClient(
process.env.SUPABASE_URL ?? '',
process.env.SUPABASE_SERVICE_ROLE_KEY ?? '',
);
const vectorStore = new SupabaseVectorStore(embeddings, {
client: supabaseClient,
tableName: 'documents',
queryName: 'match_documents',
});
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { writeFile } from "fs/promises";
import { reduceDocs } from "./src/shared/state.js";
const pdfPath = "./test_docs/test-tsla-10k-2023.pdf"
const loader = new PDFLoader(pdfPath);
const docs = await loader.load();
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 2000,
chunkOverlap: 50,
});
const docSplits = await textSplitter.splitDocuments(docs);
const finalDocs = reduceDocs([], docSplits);
finalDocs[0]
const documentIds = finalDocs.map((doc) => doc.id);
await vectorStore.addDocuments(finalDocs, {ids: documentIds});
//count the number of documents in the vector store
await vectorStore.client.from('documents').select('id', { count:
'exact'});
await vectorStore.similaritySearch("risk factors");
import { pull } from "langchain/hub";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { z } from "zod";
import { ChatOpenAI } from "@langchain/openai";
const retriever = vectorStore.asRetriever({
k: 2,
})
const answerSchema = z.object({
answer: z.string().describe("The answer to the question"),
sources: z.array(z.string()).describe("The full text of the documents used to answer the question"),
});
const llm = new ChatOpenAI({
model: "gpt-4o",
temperature: 0,
})
const promptTemplate = await pull<ChatPromptTemplate>("rlm/rag-prompt");
const query = "What is the name of the company in the report?"
const retrievedDocs = await retriever.invoke(query)
const formattedPrompt = await promptTemplate.invoke({
context: retrievedDocs,
question: query,
});
const response = await llm.invoke(formattedPrompt);
console.log(response);
// delete all documents from the vector store
await vectorStore.client.from('documents').delete().neq('id', '');