Spaces:

tstone87
/

ccr-colorado

Running

App Files Files Community

tstone87 commited on Feb 2

Commit

f073b54

verified ·

1 Parent(s): cfec7bd

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -73

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from huggingface_hub import HfApi, hf_hub_download, login
 # 🔹 Hugging Face Repository Details
 HF_REPO_ID = "tstone87/repo"  # Your dataset repo
-HF_TOKEN = os.getenv("HF_TOKEN")  # Retrieve token securely
 if not HF_TOKEN:
     raise ValueError("❌ ERROR: Hugging Face token not found. Add it as a secret in the Hugging Face Space settings.")
@@ -37,79 +37,53 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
 chunk_size = 500
 chunks = [POLICY_TEXT[i:i+chunk_size] for i in range(0, len(POLICY_TEXT), chunk_size)] if POLICY_TEXT else []
-# 🔹 Function to Upload FAISS Files to Hugging Face Hub
-def upload_faiss_to_hf():
-    api = HfApi()
-    if os.path.exists(EMBEDDINGS_FILE):
-        print("📤 Uploading FAISS embeddings to Hugging Face...")
-        api.upload_file(
-            path_or_fileobj=EMBEDDINGS_FILE,
-            path_in_repo=EMBEDDINGS_FILE,
-            repo_id=HF_REPO_ID,
-            repo_type="dataset",
-            token=HF_TOKEN,
-        )
-    if os.path.exists(INDEX_FILE):
-        print("📤 Uploading FAISS index to Hugging Face...")
-        api.upload_file(
-            path_or_fileobj=INDEX_FILE,
-            path_in_repo=INDEX_FILE,
-            repo_id=HF_REPO_ID,
-            repo_type="dataset",
-            token=HF_TOKEN,
-        )
-    print("✅ FAISS files successfully uploaded to Hugging Face.")
-# 🔹 Function to Download FAISS Files from Hugging Face Hub if Missing
 def download_faiss_from_hf():
-    if not os.path.exists(EMBEDDINGS_FILE):
-        print("📥 Downloading FAISS embeddings from Hugging Face...")
-        hf_hub_download(repo_id=HF_REPO_ID, filename=EMBEDDINGS_FILE, local_dir=".", token=HF_TOKEN)
-    if not os.path.exists(INDEX_FILE):
-        print("📥 Downloading FAISS index from Hugging Face...")
-        hf_hub_download(repo_id=HF_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
-    print("✅ FAISS files downloaded from Hugging Face.")
 # 🔹 Check if FAISS Files Exist, Otherwise Download or Generate
 if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
     print("✅ FAISS files found locally. Loading from disk...")
     embeddings = np.load(EMBEDDINGS_FILE)
     index = faiss.read_index(INDEX_FILE)
 else:
-    print("🚀 FAISS files not found! Downloading from Hugging Face...")
-    download_faiss_from_hf()
-    if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
-        embeddings = np.load(EMBEDDINGS_FILE)
-        index = faiss.read_index(INDEX_FILE)
     else:
-        print("🚀 No FAISS files found. Recomputing...")
-        if chunks:
-            embeddings = np.array([model.encode(chunk) for chunk in chunks])
-            # Save embeddings for future use
-            np.save(EMBEDDINGS_FILE, embeddings)
-            # Use FAISS optimized index for faster lookup
-            d = embeddings.shape[1]
-            nlist = 10  # Number of clusters
-            index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, nlist)
-            index.train(embeddings)
-            index.add(embeddings)
-            index.nprobe = 2  # Speed optimization
-            # Save FAISS index
-            faiss.write_index(index, INDEX_FILE)
-            upload_faiss_to_hf()  # Upload FAISS files to Hugging Face
-            print("✅ FAISS index created and saved.")
-        else:
-            print("❌ ERROR: No text to index. Check combined_text_documents.txt.")
-            index = None
 # 🔹 Function to Search FAISS
 def search_policy(query, top_k=3):
@@ -121,21 +95,83 @@ def search_policy(query, top_k=3):
     return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
-# 🔹 Gradio UI to Download FAISS Files
-def prepare_faiss_files():
     if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
         shutil.copy(EMBEDDINGS_FILE, "/mnt/data/policy_embeddings.npy")
         shutil.copy(INDEX_FILE, "/mnt/data/faiss_index.bin")
-        return "✅ FAISS files are ready for download. Go to the 'Files' tab in Hugging Face Space and download them."
     else:
-        return "❌ FAISS files not found. Try running the chatbot first to generate them."
-with gr.Blocks() as download_ui:
-    gr.Markdown("### 🔽 Download FAISS Files")
     download_button = gr.Button("Prepare FAISS Files for Download")
     output_text = gr.Textbox()
-    download_button.click(fn=prepare_faiss_files, outputs=output_text)
-download_ui.launch()
-print("✅ FAISS index successfully loaded.")

 # 🔹 Hugging Face Repository Details
 HF_REPO_ID = "tstone87/repo"  # Your dataset repo
+HF_TOKEN = os.getenv("HF_TOKEN")  # Secure API token
 if not HF_TOKEN:
     raise ValueError("❌ ERROR: Hugging Face token not found. Add it as a secret in the Hugging Face Space settings.")
 chunk_size = 500
 chunks = [POLICY_TEXT[i:i+chunk_size] for i in range(0, len(POLICY_TEXT), chunk_size)] if POLICY_TEXT else []
+# 🔹 Function to Download FAISS Files from Hugging Face Hub if Available
 def download_faiss_from_hf():
+    try:
+        if not os.path.exists(EMBEDDINGS_FILE):
+            print("📥 Downloading FAISS embeddings from Hugging Face...")
+            hf_hub_download(repo_id=HF_REPO_ID, filename=EMBEDDINGS_FILE, local_dir=".", token=HF_TOKEN)
+        if not os.path.exists(INDEX_FILE):
+            print("📥 Downloading FAISS index from Hugging Face...")
+            hf_hub_download(repo_id=HF_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
+        print("✅ FAISS files downloaded from Hugging Face.")
+        return True
+    except Exception as e:
+        print(f"⚠️ FAISS files not found in Hugging Face repo. Recomputing... ({e})")
+        return False
 # 🔹 Check if FAISS Files Exist, Otherwise Download or Generate
 if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
     print("✅ FAISS files found locally. Loading from disk...")
     embeddings = np.load(EMBEDDINGS_FILE)
     index = faiss.read_index(INDEX_FILE)
+elif download_faiss_from_hf():
+    embeddings = np.load(EMBEDDINGS_FILE)
+    index = faiss.read_index(INDEX_FILE)
 else:
+    print("🚀 No FAISS files found. Creating new index...")
+    if chunks:
+        embeddings = np.array([model.encode(chunk) for chunk in chunks])
+        # Save embeddings for future use
+        np.save(EMBEDDINGS_FILE, embeddings)
+        # Use FAISS optimized index for faster lookup
+        d = embeddings.shape[1]
+        nlist = 10  # Number of clusters
+        index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, nlist)
+        index.train(embeddings)
+        index.add(embeddings)
+        index.nprobe = 2  # Speed optimization
+        # Save FAISS index
+        faiss.write_index(index, INDEX_FILE)
+        print("✅ FAISS index created and saved.")
     else:
+        print("❌ ERROR: No text to index. Check combined_text_documents.txt.")
+        index = None
 # 🔹 Function to Search FAISS
 def search_policy(query, top_k=3):
     return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
+# 🔹 Hugging Face LLM Client
+from huggingface_hub import InferenceClient
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# 🔹 Function to Handle Chat Responses
+def respond(message, history, system_message, max_tokens, temperature, top_p):
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    # 🔹 Retrieve relevant policy info from FAISS
+    policy_context = search_policy(message)
+    if policy_context:
+        # 🔹 Display retrieved context in chat
+        messages.append({"role": "assistant", "content": f"📄 **Relevant Policy Context:**\n\n{policy_context}"})
+        # 🔹 Force the LLM to use the retrieved policy text
+        user_query_with_context = f"""
+        The following is the most relevant policy information retrieved from the official Colorado public assistance policies:
+        {policy_context}
+        Based on this information, answer the following question:
+        {message}
+        """
+        messages.append({"role": "user", "content": user_query_with_context})
+    else:
+        # If no relevant policy info is found, use the original message
+        messages.append({"role": "user", "content": message})
+    response = ""
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message.choices[0].delta.content
+        response += token
+        yield response
+# 🔹 Gradio Chat Interface
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(
+            value="You are a knowledgeable and professional chatbot designed to assist Colorado case workers in determining eligibility for public assistance programs.",
+            label="System message"
+        ),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+    ],
+)
+# 🔹 Function to Provide FAISS Files for Download
+def download_faiss_files():
     if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
         shutil.copy(EMBEDDINGS_FILE, "/mnt/data/policy_embeddings.npy")
         shutil.copy(INDEX_FILE, "/mnt/data/faiss_index.bin")
+        return "✅ FAISS files ready for download! Check the 'Files' tab in your Hugging Face Space."
     else:
+        return "❌ FAISS files not found. Run the chatbot first to generate them."
+# Gradio button for downloading FAISS files
+with gr.Blocks() as file_download:
+    gr.Markdown("### 🔽 Download FAISS Files to Your Computer")
     download_button = gr.Button("Prepare FAISS Files for Download")
     output_text = gr.Textbox()
+    download_button.click(fn=download_faiss_files, outputs=output_text)
+if __name__ == "__main__":
+    demo.launch()
+    file_download.launch()