Spaces:

tstone87
/

ccr-colorado

Running

App Files Files Community

Thomas Stone commited on Feb 2

Commit

61c0cf3

verified ·

1 Parent(s): 02dde43

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -20

app.py CHANGED Viewed

@@ -1,11 +1,46 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -23,10 +58,13 @@ def respond(
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
@@ -35,31 +73,19 @@ def respond(
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a knowledgeable and professional chatbot designed to assist Colorado case workers in determining eligibility for public assistance programs. Your primary role is to provide accurate, up-to-date, and policy-compliant information on Medicaid, SNAP, TANF, CHP+, and other state and federal assistance programs. Responses should be clear, concise, and structured based on eligibility criteria, income limits, deductions, federal poverty level FPL guidelines, and program-specific requirements.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+import fitz  # PyMuPDF
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
 from huggingface_hub import InferenceClient
+# Load embedding model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Function to extract text from PDFs
+def extract_text_from_pdf(pdf_path):
+    doc = fitz.open(pdf_path)
+    text = ""
+    for page in doc:
+        text += page.get_text() + "\n"
+    return text
+# Load and process PDFs
+pdf_files = ["eligibility_guidelines.pdf", "public_assistance_rules.pdf"]  # Add PDF filenames
+all_text = ""
+for pdf in pdf_files:
+    all_text += extract_text_from_pdf(pdf)
+# Split into chunks
+chunk_size = 500
+chunks = [all_text[i:i+chunk_size] for i in range(0, len(all_text), chunk_size)]
+# Generate embeddings
+embeddings = np.array([model.encode(chunk) for chunk in chunks])
+# Create FAISS index
+index = faiss.IndexFlatL2(embeddings.shape[1])
+index.add(embeddings)
+# Function to retrieve relevant text
+def search_pdf(query, top_k=3):
+    query_embedding = model.encode(query).reshape(1, -1)
+    distances, indices = index.search(query_embedding, top_k)
+    return "\n\n".join([chunks[i] for i in indices[0]])
+# Hugging Face LLM Client
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
+    # Search for relevant text in PDFs
+    pdf_context = search_pdf(message)
+    messages.append({"role": "system", "content": f"Relevant PDF Info:\n{pdf_context}"})
     messages.append({"role": "user", "content": message})
     response = ""
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
         yield response
+# Gradio Chat Interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are a knowledgeable chatbot assisting Colorado case workers with Medicaid, SNAP, TANF, CHP+, and other programs.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )
 if __name__ == "__main__":
     demo.launch()