Back to Ai Pdf Chatbot Langchain

Ingest Demo

backend/ingest-demo.ipynb

latest2.6 KB
Original Source
typescript
import { SupabaseVectorStore } from "@langchain/community/vectorstores/supabase";
import { OpenAIEmbeddings } from "@langchain/openai";
import { createClient } from "@supabase/supabase-js";
import dotenv from "dotenv";
dotenv.config();

const embeddings = new OpenAIEmbeddings({
    model: "text-embedding-3-small",
  });

  const supabaseClient = createClient(
    process.env.SUPABASE_URL ?? '',
    process.env.SUPABASE_SERVICE_ROLE_KEY ?? '',
  );
  const vectorStore = new SupabaseVectorStore(embeddings, {
    client: supabaseClient,
    tableName: 'documents',
    queryName: 'match_documents',
  });


typescript
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { writeFile } from "fs/promises";
import { reduceDocs } from "./src/shared/state.js";

const pdfPath = "./test_docs/test-tsla-10k-2023.pdf"

const loader = new PDFLoader(pdfPath);

const docs = await loader.load();


const textSplitter = new RecursiveCharacterTextSplitter({
    chunkSize: 2000,
    chunkOverlap: 50,
  });
const docSplits = await textSplitter.splitDocuments(docs);

const finalDocs = reduceDocs([], docSplits);

  
finalDocs[0]


typescript
const documentIds = finalDocs.map((doc) => doc.id);
await vectorStore.addDocuments(finalDocs, {ids: documentIds});
  
typescript
//count the number of documents in the vector store

await vectorStore.client.from('documents').select('id', { count: 
'exact'});

typescript
await vectorStore.similaritySearch("risk factors");
typescript
import { pull } from "langchain/hub";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { z } from "zod";
import { ChatOpenAI } from "@langchain/openai";

const retriever = vectorStore.asRetriever({
  k: 2,
})

const answerSchema = z.object({
  answer: z.string().describe("The answer to the question"),
  sources: z.array(z.string()).describe("The full text of the documents used to answer the question"),
});

const llm = new ChatOpenAI({
    model: "gpt-4o",
    temperature: 0,
  })


const promptTemplate = await pull<ChatPromptTemplate>("rlm/rag-prompt");

const query = "What is the name of the company in the report?"

const retrievedDocs = await retriever.invoke(query)

const formattedPrompt = await promptTemplate.invoke({
  context: retrievedDocs,
  question: query,
});

const response = await llm.invoke(formattedPrompt);

console.log(response);

typescript
// delete all documents from the vector store

await vectorStore.client.from('documents').delete().neq('id', '');