Spaces:

Penality
/

pdf-something

Sleeping

Penality commited on Feb 23

Commit

adec520

verified ·

1 Parent(s): c8d88b2

Update app.py

created INDEX_FILE if non-existent

Files changed (1) hide show

app.py CHANGED Viewed

@@ -40,10 +40,15 @@ os.makedirs(DOCUMENT_DIR, exist_ok=True)
 # Load FAISS index if it exists
 if os.path.exists(INDEX_FILE):
     index = faiss.read_index(INDEX_FILE)
 # Load metadata
 if os.path.exists(METADATA_FILE):
     with open(METADATA_FILE, "r") as f:
         metadata = json.load(f)
 else:
@@ -73,7 +78,9 @@ def store_document(text):
     # Update metadata with FAISS index
     metadata[str(doc_index)] = filename
     with open(METADATA_FILE, "w") as f:
-        json.dump(metadata, f)
     # Save FAISS index properly
     faiss.write_index(index, INDEX_FILE)
@@ -88,7 +95,8 @@ def retrieve_document(query):
     _, closest_idx = index.search(query_embedding, 1)
     if not closest_idx or closest_idx[0][0] not in metadata:
-        return "No relevant document found."
     if closest_idx[0][0] in metadata:  # Ensure a valid match
@@ -139,7 +147,7 @@ def chatbot(pdf_file, user_question):
     doc = retrieve_document(user_question)
     if doc:
-        print("found doc")
         # Split into smaller chunks
         chunks = split_text(doc)

 # Load FAISS index if it exists
 if os.path.exists(INDEX_FILE):
+    print(" FAISS index file exists")
     index = faiss.read_index(INDEX_FILE)
+else:
+    print(" No FAISS index found. Creating a new one.")
+    index = faiss.IndexFlatL2(embedding_dim)  # Empty FAISS index
 # Load metadata
 if os.path.exists(METADATA_FILE):
+    print("metadata exists")
     with open(METADATA_FILE, "r") as f:
         metadata = json.load(f)
 else:
     # Update metadata with FAISS index
     metadata[str(doc_index)] = filename
     with open(METADATA_FILE, "w") as f:
+        print(metadata)
+        json.dump(metadata, f
+    print("saved Metadata")
     # Save FAISS index properly
     faiss.write_index(index, INDEX_FILE)
     _, closest_idx = index.search(query_embedding, 1)
     if not closest_idx or closest_idx[0][0] not in metadata:
+        print("No relevant Document found")
+        return None
     if closest_idx[0][0] in metadata:  # Ensure a valid match
     doc = retrieve_document(user_question)
     if doc:
+        print(f"found doc{doc}")
         # Split into smaller chunks
         chunks = split_text(doc)