RustX commited on
Commit
14b3a47
·
1 Parent(s): 077463d

Create embedder.py

Browse files
Files changed (1) hide show
  1. modules/embedder.py +58 -0
modules/embedder.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import tempfile
4
+ from langchain.document_loaders.csv_loader import CSVLoader
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+
8
+
9
+ class Embedder:
10
+ def __init__(self):
11
+ self.PATH = "embeddings"
12
+ self.createEmbeddingsDir()
13
+
14
+ def createEmbeddingsDir(self):
15
+ """
16
+ Creates a directory to store the embeddings vectors
17
+ """
18
+ if not os.path.exists(self.PATH):
19
+ os.mkdir(self.PATH)
20
+
21
+ def storeDocEmbeds(self, file, filename):
22
+ """
23
+ Stores document embeddings using Langchain and FAISS
24
+ """
25
+ # Write the uploaded file to a temporary file
26
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
27
+ tmp_file.write(file)
28
+ tmp_file_path = tmp_file.name
29
+
30
+ # Load the data from the file using Langchain
31
+ loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
32
+ data = loader.load_and_split()
33
+
34
+ # Create an embeddings object using Langchain
35
+ embeddings = OpenAIEmbeddings()
36
+
37
+ # Store the embeddings vectors using FAISS
38
+ vectors = FAISS.from_documents(data, embeddings)
39
+ os.remove(tmp_file_path)
40
+
41
+ # Save the vectors to a pickle file
42
+ with open(f"{self.PATH}/{filename}.pkl", "wb") as f:
43
+ pickle.dump(vectors, f)
44
+
45
+ def getDocEmbeds(self, file, filename):
46
+ """
47
+ Retrieves document embeddings
48
+ """
49
+ # Check if embeddings vectors have already been stored in a pickle file
50
+ if not os.path.isfile(f"{self.PATH}/{filename}.pkl"):
51
+ # If not, store the vectors using the storeDocEmbeds function
52
+ self.storeDocEmbeds(file, filename)
53
+
54
+ # Load the vectors from the pickle file
55
+ with open(f"{self.PATH}/{filename}.pkl", "rb") as f:
56
+ vectors = pickle.load(f)
57
+
58
+ return vectors