Spaces:
Running
Running
Update app.py
Browse filescreated INDEX_FILE if non-existent
app.py
CHANGED
@@ -40,10 +40,15 @@ os.makedirs(DOCUMENT_DIR, exist_ok=True)
|
|
40 |
|
41 |
# Load FAISS index if it exists
|
42 |
if os.path.exists(INDEX_FILE):
|
|
|
43 |
index = faiss.read_index(INDEX_FILE)
|
|
|
|
|
|
|
44 |
|
45 |
# Load metadata
|
46 |
if os.path.exists(METADATA_FILE):
|
|
|
47 |
with open(METADATA_FILE, "r") as f:
|
48 |
metadata = json.load(f)
|
49 |
else:
|
@@ -73,7 +78,9 @@ def store_document(text):
|
|
73 |
# Update metadata with FAISS index
|
74 |
metadata[str(doc_index)] = filename
|
75 |
with open(METADATA_FILE, "w") as f:
|
76 |
-
|
|
|
|
|
77 |
|
78 |
# Save FAISS index properly
|
79 |
faiss.write_index(index, INDEX_FILE)
|
@@ -88,7 +95,8 @@ def retrieve_document(query):
|
|
88 |
_, closest_idx = index.search(query_embedding, 1)
|
89 |
|
90 |
if not closest_idx or closest_idx[0][0] not in metadata:
|
91 |
-
|
|
|
92 |
|
93 |
|
94 |
if closest_idx[0][0] in metadata: # Ensure a valid match
|
@@ -139,7 +147,7 @@ def chatbot(pdf_file, user_question):
|
|
139 |
doc = retrieve_document(user_question)
|
140 |
|
141 |
if doc:
|
142 |
-
print("found doc")
|
143 |
# Split into smaller chunks
|
144 |
chunks = split_text(doc)
|
145 |
|
|
|
40 |
|
41 |
# Load FAISS index if it exists
|
42 |
if os.path.exists(INDEX_FILE):
|
43 |
+
print(" FAISS index file exists")
|
44 |
index = faiss.read_index(INDEX_FILE)
|
45 |
+
else:
|
46 |
+
print(" No FAISS index found. Creating a new one.")
|
47 |
+
index = faiss.IndexFlatL2(embedding_dim) # Empty FAISS index
|
48 |
|
49 |
# Load metadata
|
50 |
if os.path.exists(METADATA_FILE):
|
51 |
+
print("metadata exists")
|
52 |
with open(METADATA_FILE, "r") as f:
|
53 |
metadata = json.load(f)
|
54 |
else:
|
|
|
78 |
# Update metadata with FAISS index
|
79 |
metadata[str(doc_index)] = filename
|
80 |
with open(METADATA_FILE, "w") as f:
|
81 |
+
print(metadata)
|
82 |
+
json.dump(metadata, f
|
83 |
+
print("saved Metadata")
|
84 |
|
85 |
# Save FAISS index properly
|
86 |
faiss.write_index(index, INDEX_FILE)
|
|
|
95 |
_, closest_idx = index.search(query_embedding, 1)
|
96 |
|
97 |
if not closest_idx or closest_idx[0][0] not in metadata:
|
98 |
+
print("No relevant Document found")
|
99 |
+
return None
|
100 |
|
101 |
|
102 |
if closest_idx[0][0] in metadata: # Ensure a valid match
|
|
|
147 |
doc = retrieve_document(user_question)
|
148 |
|
149 |
if doc:
|
150 |
+
print(f"found doc{doc}")
|
151 |
# Split into smaller chunks
|
152 |
chunks = split_text(doc)
|
153 |
|