tstone87 commited on
Commit
2642249
Β·
verified Β·
1 Parent(s): d35d9e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -90
app.py CHANGED
@@ -1,110 +1,59 @@
1
- import os
2
- import requests
3
- import fitz # PyMuPDF for PDF reading
4
- import faiss
5
- import numpy as np
6
  import gradio as gr
7
  from sentence_transformers import SentenceTransformer
 
 
8
  from huggingface_hub import InferenceClient
9
 
10
- # πŸ”Ή Define PDF Directory and Chunk Size
11
- PDF_DIR = "./pdfs"
12
- CHUNK_SIZE = 2500 # Larger chunks for better context
13
-
14
- # πŸ”Ή Ensure Directory Exists
15
- os.makedirs(PDF_DIR, exist_ok=True)
16
-
17
- # πŸ”Ή Direct URLs for PDF Downloads (Colorado Policy Documents)
18
- PDF_FILES = {
19
- "SNAP 10 CCR 2506-1.pdf": "https://huggingface.co/spaces/tstone87/ccr-colorado/resolve/main/SNAP%2010%20CCR%202506-1%20.pdf?download=true",
20
- "Med 10 CCR 2505-10 8.100.pdf": "https://huggingface.co/spaces/tstone87/ccr-colorado/resolve/main/Med%2010%20CCR%202505-10%208.100.pdf?download=true",
21
- }
22
 
23
- # πŸ”Ή Function to Download PDFs
 
 
 
 
 
24
 
25
- def download_pdfs():
26
- for filename, url in PDF_FILES.items():
27
- pdf_path = os.path.join(PDF_DIR, filename)
28
- if not os.path.exists(pdf_path):
29
- print(f"πŸ“₯ Downloading {filename}...")
30
- try:
31
- response = requests.get(url, stream=True)
32
- response.raise_for_status()
33
- with open(pdf_path, "wb") as f:
34
- for chunk in response.iter_content(chunk_size=8192):
35
- f.write(chunk)
36
- print(f"βœ… Downloaded {filename}")
37
- except Exception as e:
38
- print(f"❌ Error downloading {filename}: {e}")
39
 
40
- # πŸ”Ή Function to Extract Text from PDFs
 
41
 
42
- def extract_text_from_pdfs():
43
- all_text = ""
44
- for pdf_file in os.listdir(PDF_DIR):
45
- if pdf_file.endswith(".pdf"):
46
- pdf_path = os.path.join(PDF_DIR, pdf_file)
47
- doc = fitz.open(pdf_path)
48
- for page in doc:
49
- all_text += page.get_text("text") + "\n"
50
- return all_text
51
-
52
- # πŸ”Ή Initialize FAISS Index
53
-
54
- def initialize_faiss():
55
- download_pdfs()
56
- text_data = extract_text_from_pdfs()
57
- if not text_data:
58
- raise ValueError("❌ No text extracted from PDFs!")
59
-
60
- chunks = [text_data[i:i+CHUNK_SIZE] for i in range(0, len(text_data), CHUNK_SIZE)]
61
- model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
62
- embeddings = np.array([model.encode(chunk) for chunk in chunks])
63
  index = faiss.IndexFlatL2(embeddings.shape[1])
64
  index.add(embeddings)
65
- print("βœ… FAISS index initialized.")
66
- return index, chunks
67
-
68
- # πŸ”Ή Initialize FAISS on Startup
69
- index, chunks = initialize_faiss()
70
 
71
- # πŸ”Ή Function to Search FAISS
72
 
73
- def search_policy(query, top_k=3):
74
- query_embedding = SentenceTransformer("multi-qa-mpnet-base-dot-v1").encode(query).reshape(1, -1)
 
75
  distances, indices = index.search(query_embedding, top_k)
76
- return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
77
 
78
- # πŸ”Ή Hugging Face LLM Client
79
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
80
 
81
- # πŸ”Ή Function to Handle Chat Responses
82
-
83
- def respond(message, history):
84
- messages = [{"role": "system", "content": "You are a chatbot specializing in Colorado public assistance programs."}]
85
- for val in history:
86
- if val[0]:
87
- messages.append({"role": "user", "content": val[0]})
88
- if val[1]:
89
- messages.append({"role": "assistant", "content": val[1]})
90
-
91
- policy_context = search_policy(message)
92
- if policy_context:
93
- messages.append({"role": "assistant", "content": f"πŸ“„ **Colorado Policy Info:**\n\n{policy_context}"})
94
-
95
- messages.append({"role": "user", "content": message})
96
  response = ""
97
  for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
98
- token = message.choices[0].delta.content
99
- response += token
100
- yield response
101
-
102
- # πŸ”Ή Gradio Chat Interface (Colorado-Themed)
103
- demo = gr.ChatInterface(
104
- respond,
105
- textbox=gr.Textbox(placeholder="Ask about Colorado public assistance programs...", interactive=True, show_label=False),
106
- submit_btn=gr.Button("Send"),
107
- chatbot=gr.Chatbot(),
108
- )
 
109
 
110
  demo.launch()
 
1
+ import json
 
 
 
 
2
  import gradio as gr
3
  from sentence_transformers import SentenceTransformer
4
+ import faiss
5
+ import numpy as np
6
  from huggingface_hub import InferenceClient
7
 
8
+ # πŸ”Ή Load JSON Data with Colorado Food Stamp Information
9
+ DATA_FILE = "colorado_foodstamps.json"
 
 
 
 
 
 
 
 
 
 
10
 
11
+ def load_json_data():
12
+ try:
13
+ with open(DATA_FILE, "r", encoding="utf-8") as f:
14
+ return json.load(f)
15
+ except FileNotFoundError:
16
+ return {"error": "Data file not found! Please upload a valid JSON file."}
17
 
18
+ data = load_json_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # πŸ”Ή Initialize FAISS for Searching Relevant Answers
21
+ model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
22
 
23
+ def create_faiss_index(data):
24
+ texts = list(data.values())
25
+ embeddings = np.array([model.encode(text) for text in texts])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  index = faiss.IndexFlatL2(embeddings.shape[1])
27
  index.add(embeddings)
28
+ return index, texts
 
 
 
 
29
 
30
+ index, texts = create_faiss_index(data)
31
 
32
+ # πŸ”Ή Function to Search FAISS for Relevant Answers
33
+ def search_faiss(query, top_k=1):
34
+ query_embedding = model.encode(query).reshape(1, -1)
35
  distances, indices = index.search(query_embedding, top_k)
36
+ return texts[indices[0][0]] if indices[0][0] < len(texts) else "No relevant information found."
37
 
38
+ # πŸ”Ή Hugging Face API for Additional Responses
39
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
40
 
41
+ def get_huggingface_response(query):
42
+ messages = [{"role": "system", "content": "Provide accurate food stamp information for Colorado."},
43
+ {"role": "user", "content": query}]
 
 
 
 
 
 
 
 
 
 
 
 
44
  response = ""
45
  for message in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
46
+ response += message.choices[0].delta.content
47
+ return response
48
+
49
+ # πŸ”Ή Main Chatbot Function
50
+ def chatbot_response(message, history):
51
+ relevant_info = search_faiss(message)
52
+ if "No relevant information found." not in relevant_info:
53
+ return relevant_info
54
+ return get_huggingface_response(message)
55
+
56
+ # πŸ”Ή Gradio Chat Interface
57
+ demo = gr.ChatInterface(chatbot_response, textbox=gr.Textbox(placeholder="Ask about Colorado food stamps..."))
58
 
59
  demo.launch()