Spaces:

tstone87
/

ccr-colorado

Running

App Files Files Community

tstone87 commited on Feb 2

Commit

cbe5279

verified ·

1 Parent(s): 4611fb0

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -7

app.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import os
 import fitz  # PyMuPDF for PDF reading
 import faiss
 import numpy as np
 import gradio as gr
 from sentence_transformers import SentenceTransformer
-from huggingface_hub import hf_hub_download
 # 🔹 Hugging Face Space Repository Details
 HF_REPO_ID = "tstone87/ccr-colorado"
-# 🔹 Load Embedding Model (Better for QA Retrieval)
 model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
 # 🔹 Define PDF Directory and Chunk Size
@@ -19,19 +20,28 @@ CHUNK_SIZE = 2500  # Larger chunks for better context
 # 🔹 Ensure Directory Exists
 os.makedirs(PDF_DIR, exist_ok=True)
-# 🔹 Function to Download PDFs from Hugging Face Space (Handles LFS Storage)
 def download_pdfs():
     pdf_files = [
         "SNAP 10 CCR 2506-1 .pdf",
         "Med 10 CCR 2505-10 8.100.pdf",
     ]
     for pdf_file in pdf_files:
         pdf_path = os.path.join(PDF_DIR, pdf_file)
         if not os.path.exists(pdf_path):  # Download if not already present
             print(f"📥 Downloading {pdf_file}...")
-            hf_hub_download(repo_id=HF_REPO_ID, filename=pdf_file, local_dir=PDF_DIR, force_download=True)
     print("✅ All PDFs downloaded.")
 # 🔹 Function to Extract Text from PDFs
@@ -79,7 +89,6 @@ def search_policy(query, top_k=3):
     return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
 # 🔹 Hugging Face LLM Client
-from huggingface_hub import InferenceClient
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # 🔹 Function to Handle Chat Responses

 import os
+import urllib.parse
 import fitz  # PyMuPDF for PDF reading
 import faiss
 import numpy as np
 import gradio as gr
 from sentence_transformers import SentenceTransformer
+from huggingface_hub import hf_hub_download, InferenceClient
 # 🔹 Hugging Face Space Repository Details
 HF_REPO_ID = "tstone87/ccr-colorado"
+# 🔹 Load Embedding Model (Optimized for QA Retrieval)
 model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
 # 🔹 Define PDF Directory and Chunk Size
 # 🔹 Ensure Directory Exists
 os.makedirs(PDF_DIR, exist_ok=True)
+# 🔹 Function to Download PDFs from Hugging Face Space (Handles Spaces)
 def download_pdfs():
     pdf_files = [
         "SNAP 10 CCR 2506-1 .pdf",
         "Med 10 CCR 2505-10 8.100.pdf",
     ]
     for pdf_file in pdf_files:
         pdf_path = os.path.join(PDF_DIR, pdf_file)
         if not os.path.exists(pdf_path):  # Download if not already present
             print(f"📥 Downloading {pdf_file}...")
+            # URL encode spaces correctly
+            encoded_filename = urllib.parse.quote(pdf_file)
+            try:
+                hf_hub_download(repo_id=HF_REPO_ID, filename=encoded_filename, local_dir=PDF_DIR, force_download=True)
+                print(f"✅ Successfully downloaded {pdf_file}")
+            except Exception as e:
+                print(f"❌ Error downloading {pdf_file}: {e}")
     print("✅ All PDFs downloaded.")
 # 🔹 Function to Extract Text from PDFs
     return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
 # 🔹 Hugging Face LLM Client
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # 🔹 Function to Handle Chat Responses