tstone87 commited on
Commit
d5b8fa3
Β·
verified Β·
1 Parent(s): 9e4d73f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -181
app.py CHANGED
@@ -1,186 +1,22 @@
1
  import os
2
- import faiss
3
- import numpy as np
4
- from sentence_transformers import SentenceTransformer
5
- from huggingface_hub import HfApi, hf_hub_download, login, whoami
6
-
7
- # πŸ”Ή Hugging Face Repository Details
8
- HF_REPO_ID = "tstone87/repo" # Your repo
9
- HF_TOKEN = os.getenv("HF_TOKEN") # Retrieve token securely from environment variable
10
-
11
- if not HF_TOKEN:
12
- raise ValueError("❌ ERROR: Hugging Face token not found. Add it as a secret in the Hugging Face Space settings.")
13
-
14
- # πŸ”Ή Authenticate with Hugging Face
15
- login(token=HF_TOKEN)
16
-
17
- # πŸ”Ή File Paths
18
- EMBEDDINGS_FILE = "policy_embeddings.npy"
19
- INDEX_FILE = "faiss_index.bin"
20
- TEXT_FILE = "combined_text_documents.txt"
21
-
22
- # πŸ”Ή Load policy text from file
23
- if os.path.exists(TEXT_FILE):
24
- with open(TEXT_FILE, "r", encoding="utf-8") as f:
25
- POLICY_TEXT = f.read()
26
- print("βœ… Loaded policy text from combined_text_documents.txt")
27
- else:
28
- print("❌ ERROR: combined_text_documents.txt not found! Ensure it's uploaded.")
29
- POLICY_TEXT = ""
30
-
31
- # πŸ”Ή Sentence Embedding Model (Optimized for Speed)
32
- model = SentenceTransformer("all-MiniLM-L6-v2")
33
-
34
- # πŸ”Ή Split policy text into chunks for FAISS indexing
35
- chunk_size = 500
36
- chunks = [POLICY_TEXT[i:i+chunk_size] for i in range(0, len(POLICY_TEXT), chunk_size)] if POLICY_TEXT else []
37
-
38
- # πŸ”Ή Function to Upload FAISS Files to Hugging Face Hub
39
- def upload_faiss_to_hf():
40
- api = HfApi()
41
-
42
- if os.path.exists(EMBEDDINGS_FILE):
43
- print("πŸ“€ Uploading FAISS embeddings to Hugging Face...")
44
- api.upload_file(
45
- path_or_fileobj=EMBEDDINGS_FILE,
46
- path_in_repo=EMBEDDINGS_FILE,
47
- repo_id=HF_REPO_ID,
48
- repo_type="dataset",
49
- token=HF_TOKEN,
50
- )
51
-
52
- if os.path.exists(INDEX_FILE):
53
- print("πŸ“€ Uploading FAISS index to Hugging Face...")
54
- api.upload_file(
55
- path_or_fileobj=INDEX_FILE,
56
- path_in_repo=INDEX_FILE,
57
- repo_id=HF_REPO_ID,
58
- repo_type="dataset",
59
- token=HF_TOKEN,
60
- )
61
-
62
- print("βœ… FAISS files successfully uploaded to Hugging Face.")
63
-
64
- # πŸ”Ή Function to Download FAISS Files from Hugging Face Hub if Missing
65
- def download_faiss_from_hf():
66
- if not os.path.exists(EMBEDDINGS_FILE):
67
- print("πŸ“₯ Downloading FAISS embeddings from Hugging Face...")
68
- hf_hub_download(repo_id=HF_REPO_ID, filename=EMBEDDINGS_FILE, local_dir=".", token=HF_TOKEN)
69
-
70
- if not os.path.exists(INDEX_FILE):
71
- print("πŸ“₯ Downloading FAISS index from Hugging Face...")
72
- hf_hub_download(repo_id=HF_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
73
-
74
- print("βœ… FAISS files downloaded from Hugging Face.")
75
-
76
- # πŸ”Ή Check if FAISS Files Exist, Otherwise Download
77
- if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
78
- print("βœ… FAISS files found locally. Loading from disk...")
79
- embeddings = np.load(EMBEDDINGS_FILE)
80
- index = faiss.read_index(INDEX_FILE)
81
- else:
82
- print("πŸš€ FAISS files not found! Downloading from Hugging Face...")
83
- download_faiss_from_hf()
84
-
85
- if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
86
- embeddings = np.load(EMBEDDINGS_FILE)
87
- index = faiss.read_index(INDEX_FILE)
88
- else:
89
- print("πŸš€ No FAISS files found. Recomputing...")
90
- if chunks:
91
- embeddings = np.array([model.encode(chunk) for chunk in chunks])
92
-
93
- # Save embeddings for future use
94
- np.save(EMBEDDINGS_FILE, embeddings)
95
-
96
- # Use FAISS optimized index for faster lookup
97
- d = embeddings.shape[1]
98
- nlist = 10 # Number of clusters
99
- index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, nlist)
100
- index.train(embeddings)
101
- index.add(embeddings)
102
- index.nprobe = 2 # Speed optimization
103
-
104
- # Save FAISS index
105
- faiss.write_index(index, INDEX_FILE)
106
- upload_faiss_to_hf() # Upload FAISS files to Hugging Face
107
- print("βœ… FAISS index created and saved.")
108
- else:
109
- print("❌ ERROR: No text to index. Check combined_text_documents.txt.")
110
- index = None
111
-
112
- # πŸ”Ή Function to Search FAISS
113
- def search_policy(query, top_k=3):
114
- if index is None:
115
- return "Error: FAISS index is not available."
116
-
117
- query_embedding = model.encode(query).reshape(1, -1)
118
- distances, indices = index.search(query_embedding, top_k)
119
-
120
- return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
121
-
122
- # πŸ”Ή Hugging Face LLM Client
123
- from huggingface_hub import InferenceClient
124
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
125
-
126
- # πŸ”Ή Function to Handle Chat Responses
127
- def respond(message, history, system_message, max_tokens, temperature, top_p):
128
- messages = [{"role": "system", "content": system_message}]
129
-
130
- for val in history:
131
- if val[0]:
132
- messages.append({"role": "user", "content": val[0]})
133
- if val[1]:
134
- messages.append({"role": "assistant", "content": val[1]})
135
-
136
- # πŸ”Ή Retrieve relevant policy info from FAISS
137
- policy_context = search_policy(message)
138
-
139
- if policy_context:
140
- # πŸ”Ή Display retrieved context in chat
141
- messages.append({"role": "assistant", "content": f"πŸ“„ **Relevant Policy Context:**\n\n{policy_context}"})
142
-
143
- # πŸ”Ή Force the LLM to use the retrieved policy text
144
- user_query_with_context = f"""
145
- The following is the most relevant policy information retrieved from the official Colorado public assistance policies:
146
-
147
- {policy_context}
148
 
149
- Based on this information, answer the following question:
150
- {message}
151
- """
152
- messages.append({"role": "user", "content": user_query_with_context})
 
 
153
  else:
154
- # If no relevant policy info is found, use the original message
155
- messages.append({"role": "user", "content": message})
156
-
157
- response = ""
158
- for message in client.chat_completion(
159
- messages,
160
- max_tokens=max_tokens,
161
- stream=True,
162
- temperature=temperature,
163
- top_p=top_p,
164
- ):
165
- token = message.choices[0].delta.content
166
- response += token
167
- yield response
168
-
169
- # πŸ”Ή Gradio Chat Interface
170
- import gradio as gr
171
 
172
- demo = gr.ChatInterface(
173
- respond,
174
- additional_inputs=[
175
- gr.Textbox(
176
- value="You are a knowledgeable and professional chatbot designed to assist Colorado case workers in determining eligibility for public assistance programs.",
177
- label="System message"
178
- ),
179
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
180
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
181
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
182
- ],
183
- )
184
 
185
- if __name__ == "__main__":
186
- demo.launch()
 
1
  import os
2
+ import shutil
3
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Function to prepare FAISS files for download
6
+ def prepare_faiss_files():
7
+ if os.path.exists("policy_embeddings.npy") and os.path.exists("faiss_index.bin"):
8
+ shutil.copy("policy_embeddings.npy", "/mnt/data/policy_embeddings.npy")
9
+ shutil.copy("faiss_index.bin", "/mnt/data/faiss_index.bin")
10
+ return "βœ… FAISS files are ready for download. Go to the 'Files' tab in Hugging Face Space and download them."
11
  else:
12
+ return "❌ FAISS files not found. Try running the chatbot first to generate them."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Gradio UI to trigger FAISS file preparation
15
+ with gr.Blocks() as download_ui:
16
+ gr.Markdown("### πŸ”½ Download FAISS Files")
17
+ download_button = gr.Button("Prepare FAISS Files for Download")
18
+ output_text = gr.Textbox()
19
+ download_button.click(fn=prepare_faiss_files, outputs=output_text)
 
 
 
 
 
 
20
 
21
+ # Launch the download interface
22
+ download_ui.launch()