Penality commited on
Commit
7d978f8
·
verified ·
1 Parent(s): 004ce6c

Update app.py

Browse files

updated code to handle file storage

Files changed (1) hide show
  1. app.py +50 -13
app.py CHANGED
@@ -29,29 +29,66 @@ embedding_dim = 768 # Adjust according to model
29
  index = faiss.IndexFlatL2(embedding_dim)
30
  documents = [] # Store raw text for reference
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def store_document(text):
33
  print("storing document")
 
 
 
 
 
 
 
34
 
35
- embedding = embedding_model.encode([text])
36
- print(f"embedding: \n{embedding}")
37
- index.add(np.array(embedding, dtype=np.float32))
38
- documents.append(text)
 
 
 
 
 
 
 
39
 
40
- print(f"your document has been stored")
41
 
42
  return "Document stored!"
43
 
44
  def retrieve_document(query):
45
  print(f"retrieving doc based on: \n{query}")
46
 
47
- if len(documents) >= 1:
48
- query_embedding = embedding_model.encode([query])
49
- _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), 1)
50
 
51
- print(f"retrieved: \n{documents[closest_idx[0][0]]}")
52
-
53
- return documents[closest_idx[0][0]]
54
- return None
 
 
55
 
56
 
57
  def clean_text(text):
@@ -131,4 +168,4 @@ iface = gr.Interface(
131
  )
132
 
133
  # Launch Gradio app
134
- iface.launch()
 
29
  index = faiss.IndexFlatL2(embedding_dim)
30
  documents = [] # Store raw text for reference
31
 
32
+
33
+ # initialize the variables to store documents
34
+ DOCUMENT_DIR = "Documents"
35
+ INDEX_FILE = "faiss_index.py" # stores embeddings
36
+ METADATA_FILE = "metadata.json" # stores Document metadata
37
+
38
+ # create the directory
39
+ os.makedirs(DOCUMENT_DIR, exists_ok=True)
40
+
41
+ # load the faiss indexes file
42
+ if os.path.exists(INDEX_FILE): # check if index file exists
43
+ stored_embeddings = np.load(INDEX_FILE) # load emeddings
44
+ if stored_embeddings.shape[0] > 0:
45
+ index.add(stored_embeddings)
46
+
47
+ # load the document metadata
48
+ if os.path.exists(METADATA_FILE): # check if metadata exists
49
+ with open(METADATA_FILE, "r") as f:
50
+ metadata = json.load(f)
51
+ else:
52
+ metadata = {}
53
+
54
  def store_document(text):
55
  print("storing document")
56
+
57
+ # Generate a unique filename
58
+ filename = os.path.join(DOCS_DIR, f"doc_{len(metadata) + 1}.txt")
59
+
60
+ # Save document in a file
61
+ with open(filename, "w") as f:
62
+ f.write(text)
63
 
64
+ # Generate and store embedding
65
+ embedding = embedding_model.encode([text]).astype(np.float32)
66
+ index.add(embedding)
67
+
68
+ # Update metadata
69
+ metadata[len(metadata)] = filename
70
+ with open(METADATA_FILE, "w") as f:
71
+ json.dump(metadata, f)
72
+
73
+ # Save FAISS index
74
+ np.save(INDEX_FILE, index.reconstruct_n(0, index.ntotal))
75
 
76
+ print(f"your document has been stored at: {filename}")
77
 
78
  return "Document stored!"
79
 
80
  def retrieve_document(query):
81
  print(f"retrieving doc based on: \n{query}")
82
 
83
+ query_embedding = embedding_model.encode([query]).astype(np.float32)
84
+ _, closest_idx = index.search(query_embedding, 1)
 
85
 
86
+ if closest_idx[0][0] in metadata: # Ensure a valid match
87
+ filename = metadata[str(closest_idx[0][0])]
88
+ with open(filename, "r") as f:
89
+ return f.read()
90
+ else:
91
+ return None
92
 
93
 
94
  def clean_text(text):
 
168
  )
169
 
170
  # Launch Gradio app
171
+ iface.launch()