const response = await fetch(path);
const text = await response.text();
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 250,
chunkOverlap: 40
});
const output = await splitter.createDocuments([text]);
const textArr = output.map(chunk => chunk.pageContent);
return textArr;
}
const handbookChunks = await splitDocument('handbook.txt');
/*
Challenge:
Create a function createEmbeddings(chunks) that turns all the text
chunks into embeddings using the Mistral API, and returns the data
in the format we want for our vector database (supabase).
Here's how the data should be structured:
const embeddingsAndContent = [
{
content: "professional ethics and behavior are expected of all Ambrics ...",
embedding: [-0.07147216796875, -0.02557373046875, 0.01155853271484375 ... ]
},
{
content: "diplomacy and courtesy in their professional relationships ...",
embedding: [-0.006488800048828125, -0.022796630859375, -0.0653076171875 ... ]
},
...
]
*/
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import MistralClient from "@mistralai/mistralai";
const client = new MistralClient(process.env.MISTRAL_API_KEY);
async function splitDocument(path) {