Spaces:
Runtime error
Runtime error
| import os | |
| import pickle | |
| import tempfile | |
| from langchain.document_loaders.csv_loader import CSVLoader | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| class Embedder: | |
| def __init__(self): | |
| self.PATH = "embeddings" | |
| self.createEmbeddingsDir() | |
| def createEmbeddingsDir(self): | |
| """ | |
| Creates a directory to store the embeddings vectors | |
| """ | |
| if not os.path.exists(self.PATH): | |
| os.mkdir(self.PATH) | |
| def storeDocEmbeds(self, file, filename): | |
| """ | |
| Stores document embeddings using Langchain and FAISS | |
| """ | |
| # Write the uploaded file to a temporary file | |
| with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file: | |
| tmp_file.write(file) | |
| tmp_file_path = tmp_file.name | |
| # Load the data from the file using Langchain | |
| loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8") | |
| data = loader.load_and_split() | |
| # Create an embeddings object using Langchain | |
| embeddings = OpenAIEmbeddings() | |
| # Store the embeddings vectors using FAISS | |
| vectors = FAISS.from_documents(data, embeddings) | |
| os.remove(tmp_file_path) | |
| # Save the vectors to a pickle file | |
| with open(f"{self.PATH}/{filename}.pkl", "wb") as f: | |
| pickle.dump(vectors, f) | |
| def getDocEmbeds(self, file, filename): | |
| """ | |
| Retrieves document embeddings | |
| """ | |
| # Check if embeddings vectors have already been stored in a pickle file | |
| if not os.path.isfile(f"{self.PATH}/{filename}.pkl"): | |
| # If not, store the vectors using the storeDocEmbeds function | |
| self.storeDocEmbeds(file, filename) | |
| # Load the vectors from the pickle file | |
| with open(f"{self.PATH}/{filename}.pkl", "rb") as f: | |
| vectors = pickle.load(f) | |
| return vectors |