tstone87 commited on
Commit
f073b54
Β·
verified Β·
1 Parent(s): cfec7bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -73
app.py CHANGED
@@ -8,7 +8,7 @@ from huggingface_hub import HfApi, hf_hub_download, login
8
 
9
  # πŸ”Ή Hugging Face Repository Details
10
  HF_REPO_ID = "tstone87/repo" # Your dataset repo
11
- HF_TOKEN = os.getenv("HF_TOKEN") # Retrieve token securely
12
 
13
  if not HF_TOKEN:
14
  raise ValueError("❌ ERROR: Hugging Face token not found. Add it as a secret in the Hugging Face Space settings.")
@@ -37,79 +37,53 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
37
  chunk_size = 500
38
  chunks = [POLICY_TEXT[i:i+chunk_size] for i in range(0, len(POLICY_TEXT), chunk_size)] if POLICY_TEXT else []
39
 
40
- # πŸ”Ή Function to Upload FAISS Files to Hugging Face Hub
41
- def upload_faiss_to_hf():
42
- api = HfApi()
43
-
44
- if os.path.exists(EMBEDDINGS_FILE):
45
- print("πŸ“€ Uploading FAISS embeddings to Hugging Face...")
46
- api.upload_file(
47
- path_or_fileobj=EMBEDDINGS_FILE,
48
- path_in_repo=EMBEDDINGS_FILE,
49
- repo_id=HF_REPO_ID,
50
- repo_type="dataset",
51
- token=HF_TOKEN,
52
- )
53
-
54
- if os.path.exists(INDEX_FILE):
55
- print("πŸ“€ Uploading FAISS index to Hugging Face...")
56
- api.upload_file(
57
- path_or_fileobj=INDEX_FILE,
58
- path_in_repo=INDEX_FILE,
59
- repo_id=HF_REPO_ID,
60
- repo_type="dataset",
61
- token=HF_TOKEN,
62
- )
63
-
64
- print("βœ… FAISS files successfully uploaded to Hugging Face.")
65
-
66
- # πŸ”Ή Function to Download FAISS Files from Hugging Face Hub if Missing
67
  def download_faiss_from_hf():
68
- if not os.path.exists(EMBEDDINGS_FILE):
69
- print("πŸ“₯ Downloading FAISS embeddings from Hugging Face...")
70
- hf_hub_download(repo_id=HF_REPO_ID, filename=EMBEDDINGS_FILE, local_dir=".", token=HF_TOKEN)
 
71
 
72
- if not os.path.exists(INDEX_FILE):
73
- print("πŸ“₯ Downloading FAISS index from Hugging Face...")
74
- hf_hub_download(repo_id=HF_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
75
 
76
- print("βœ… FAISS files downloaded from Hugging Face.")
 
 
 
 
77
 
78
  # πŸ”Ή Check if FAISS Files Exist, Otherwise Download or Generate
79
  if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
80
  print("βœ… FAISS files found locally. Loading from disk...")
81
  embeddings = np.load(EMBEDDINGS_FILE)
82
  index = faiss.read_index(INDEX_FILE)
 
 
 
83
  else:
84
- print("πŸš€ FAISS files not found! Downloading from Hugging Face...")
85
- download_faiss_from_hf()
86
-
87
- if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
88
- embeddings = np.load(EMBEDDINGS_FILE)
89
- index = faiss.read_index(INDEX_FILE)
 
 
 
 
 
 
 
 
 
 
 
 
90
  else:
91
- print("πŸš€ No FAISS files found. Recomputing...")
92
- if chunks:
93
- embeddings = np.array([model.encode(chunk) for chunk in chunks])
94
-
95
- # Save embeddings for future use
96
- np.save(EMBEDDINGS_FILE, embeddings)
97
-
98
- # Use FAISS optimized index for faster lookup
99
- d = embeddings.shape[1]
100
- nlist = 10 # Number of clusters
101
- index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, nlist)
102
- index.train(embeddings)
103
- index.add(embeddings)
104
- index.nprobe = 2 # Speed optimization
105
-
106
- # Save FAISS index
107
- faiss.write_index(index, INDEX_FILE)
108
- upload_faiss_to_hf() # Upload FAISS files to Hugging Face
109
- print("βœ… FAISS index created and saved.")
110
- else:
111
- print("❌ ERROR: No text to index. Check combined_text_documents.txt.")
112
- index = None
113
 
114
  # πŸ”Ή Function to Search FAISS
115
  def search_policy(query, top_k=3):
@@ -121,21 +95,83 @@ def search_policy(query, top_k=3):
121
 
122
  return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
123
 
124
- # πŸ”Ή Gradio UI to Download FAISS Files
125
- def prepare_faiss_files():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
127
  shutil.copy(EMBEDDINGS_FILE, "/mnt/data/policy_embeddings.npy")
128
  shutil.copy(INDEX_FILE, "/mnt/data/faiss_index.bin")
129
- return "βœ… FAISS files are ready for download. Go to the 'Files' tab in Hugging Face Space and download them."
130
  else:
131
- return "❌ FAISS files not found. Try running the chatbot first to generate them."
132
 
133
- with gr.Blocks() as download_ui:
134
- gr.Markdown("### πŸ”½ Download FAISS Files")
 
135
  download_button = gr.Button("Prepare FAISS Files for Download")
136
  output_text = gr.Textbox()
137
- download_button.click(fn=prepare_faiss_files, outputs=output_text)
138
-
139
- download_ui.launch()
140
 
141
- print("βœ… FAISS index successfully loaded.")
 
 
 
8
 
9
  # πŸ”Ή Hugging Face Repository Details
10
  HF_REPO_ID = "tstone87/repo" # Your dataset repo
11
+ HF_TOKEN = os.getenv("HF_TOKEN") # Secure API token
12
 
13
  if not HF_TOKEN:
14
  raise ValueError("❌ ERROR: Hugging Face token not found. Add it as a secret in the Hugging Face Space settings.")
 
37
  chunk_size = 500
38
  chunks = [POLICY_TEXT[i:i+chunk_size] for i in range(0, len(POLICY_TEXT), chunk_size)] if POLICY_TEXT else []
39
 
40
+ # πŸ”Ή Function to Download FAISS Files from Hugging Face Hub if Available
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def download_faiss_from_hf():
42
+ try:
43
+ if not os.path.exists(EMBEDDINGS_FILE):
44
+ print("πŸ“₯ Downloading FAISS embeddings from Hugging Face...")
45
+ hf_hub_download(repo_id=HF_REPO_ID, filename=EMBEDDINGS_FILE, local_dir=".", token=HF_TOKEN)
46
 
47
+ if not os.path.exists(INDEX_FILE):
48
+ print("πŸ“₯ Downloading FAISS index from Hugging Face...")
49
+ hf_hub_download(repo_id=HF_REPO_ID, filename=INDEX_FILE, local_dir=".", token=HF_TOKEN)
50
 
51
+ print("βœ… FAISS files downloaded from Hugging Face.")
52
+ return True
53
+ except Exception as e:
54
+ print(f"⚠️ FAISS files not found in Hugging Face repo. Recomputing... ({e})")
55
+ return False
56
 
57
  # πŸ”Ή Check if FAISS Files Exist, Otherwise Download or Generate
58
  if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
59
  print("βœ… FAISS files found locally. Loading from disk...")
60
  embeddings = np.load(EMBEDDINGS_FILE)
61
  index = faiss.read_index(INDEX_FILE)
62
+ elif download_faiss_from_hf():
63
+ embeddings = np.load(EMBEDDINGS_FILE)
64
+ index = faiss.read_index(INDEX_FILE)
65
  else:
66
+ print("πŸš€ No FAISS files found. Creating new index...")
67
+ if chunks:
68
+ embeddings = np.array([model.encode(chunk) for chunk in chunks])
69
+
70
+ # Save embeddings for future use
71
+ np.save(EMBEDDINGS_FILE, embeddings)
72
+
73
+ # Use FAISS optimized index for faster lookup
74
+ d = embeddings.shape[1]
75
+ nlist = 10 # Number of clusters
76
+ index = faiss.IndexIVFFlat(faiss.IndexFlatL2(d), d, nlist)
77
+ index.train(embeddings)
78
+ index.add(embeddings)
79
+ index.nprobe = 2 # Speed optimization
80
+
81
+ # Save FAISS index
82
+ faiss.write_index(index, INDEX_FILE)
83
+ print("βœ… FAISS index created and saved.")
84
  else:
85
+ print("❌ ERROR: No text to index. Check combined_text_documents.txt.")
86
+ index = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  # πŸ”Ή Function to Search FAISS
89
  def search_policy(query, top_k=3):
 
95
 
96
  return "\n\n".join([chunks[i] for i in indices[0] if i < len(chunks)])
97
 
98
+ # πŸ”Ή Hugging Face LLM Client
99
+ from huggingface_hub import InferenceClient
100
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
101
+
102
+ # πŸ”Ή Function to Handle Chat Responses
103
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
104
+ messages = [{"role": "system", "content": system_message}]
105
+
106
+ for val in history:
107
+ if val[0]:
108
+ messages.append({"role": "user", "content": val[0]})
109
+ if val[1]:
110
+ messages.append({"role": "assistant", "content": val[1]})
111
+
112
+ # πŸ”Ή Retrieve relevant policy info from FAISS
113
+ policy_context = search_policy(message)
114
+
115
+ if policy_context:
116
+ # πŸ”Ή Display retrieved context in chat
117
+ messages.append({"role": "assistant", "content": f"πŸ“„ **Relevant Policy Context:**\n\n{policy_context}"})
118
+
119
+ # πŸ”Ή Force the LLM to use the retrieved policy text
120
+ user_query_with_context = f"""
121
+ The following is the most relevant policy information retrieved from the official Colorado public assistance policies:
122
+
123
+ {policy_context}
124
+
125
+ Based on this information, answer the following question:
126
+ {message}
127
+ """
128
+ messages.append({"role": "user", "content": user_query_with_context})
129
+ else:
130
+ # If no relevant policy info is found, use the original message
131
+ messages.append({"role": "user", "content": message})
132
+
133
+ response = ""
134
+ for message in client.chat_completion(
135
+ messages,
136
+ max_tokens=max_tokens,
137
+ stream=True,
138
+ temperature=temperature,
139
+ top_p=top_p,
140
+ ):
141
+ token = message.choices[0].delta.content
142
+ response += token
143
+ yield response
144
+
145
+ # πŸ”Ή Gradio Chat Interface
146
+ demo = gr.ChatInterface(
147
+ respond,
148
+ additional_inputs=[
149
+ gr.Textbox(
150
+ value="You are a knowledgeable and professional chatbot designed to assist Colorado case workers in determining eligibility for public assistance programs.",
151
+ label="System message"
152
+ ),
153
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
154
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
155
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
156
+ ],
157
+ )
158
+
159
+ # πŸ”Ή Function to Provide FAISS Files for Download
160
+ def download_faiss_files():
161
  if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(INDEX_FILE):
162
  shutil.copy(EMBEDDINGS_FILE, "/mnt/data/policy_embeddings.npy")
163
  shutil.copy(INDEX_FILE, "/mnt/data/faiss_index.bin")
164
+ return "βœ… FAISS files ready for download! Check the 'Files' tab in your Hugging Face Space."
165
  else:
166
+ return "❌ FAISS files not found. Run the chatbot first to generate them."
167
 
168
+ # Gradio button for downloading FAISS files
169
+ with gr.Blocks() as file_download:
170
+ gr.Markdown("### πŸ”½ Download FAISS Files to Your Computer")
171
  download_button = gr.Button("Prepare FAISS Files for Download")
172
  output_text = gr.Textbox()
173
+ download_button.click(fn=download_faiss_files, outputs=output_text)
 
 
174
 
175
+ if __name__ == "__main__":
176
+ demo.launch()
177
+ file_download.launch()