Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -184,74 +184,72 @@ def delete_documents(selected_docs):
|
|
184 |
|
185 |
return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
|
186 |
|
187 |
-
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2
|
188 |
-
print(f"Starting generate_chunked_response with {num_calls}
|
189 |
full_response = ""
|
190 |
messages = [{"role": "user", "content": prompt}]
|
191 |
|
192 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
193 |
-
# Cloudflare API
|
|
|
|
|
|
|
|
|
|
|
194 |
for i in range(num_calls):
|
195 |
print(f"Starting Cloudflare API call {i+1}")
|
196 |
-
if should_stop:
|
197 |
-
print("Stop clicked, breaking loop")
|
198 |
-
break
|
199 |
try:
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
json_data = json.loads(line.decode('utf-8').split('data: ')[1])
|
222 |
-
chunk = json_data['response']
|
223 |
-
full_response += chunk
|
224 |
-
except json.JSONDecodeError:
|
225 |
-
continue
|
226 |
print(f"Cloudflare API call {i+1} completed")
|
227 |
except Exception as e:
|
228 |
print(f"Error in generating response from Cloudflare: {str(e)}")
|
|
|
|
|
229 |
else:
|
230 |
-
#
|
231 |
client = InferenceClient(model, token=huggingface_token)
|
232 |
|
233 |
for i in range(num_calls):
|
234 |
print(f"Starting Hugging Face API call {i+1}")
|
235 |
-
if should_stop:
|
236 |
-
print("Stop clicked, breaking loop")
|
237 |
-
break
|
238 |
try:
|
239 |
-
for message in client.
|
240 |
messages=messages,
|
241 |
max_tokens=max_tokens,
|
242 |
temperature=temperature,
|
243 |
-
stream=True,
|
244 |
):
|
245 |
-
if should_stop:
|
246 |
-
print("Stop clicked during streaming, breaking")
|
247 |
-
break
|
248 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
249 |
chunk = message.choices[0].delta.content
|
250 |
full_response += chunk
|
|
|
251 |
print(f"Hugging Face API call {i+1} completed")
|
252 |
except Exception as e:
|
253 |
print(f"Error in generating response from Hugging Face: {str(e)}")
|
|
|
|
|
254 |
|
|
|
|
|
|
|
255 |
# Clean up the response
|
256 |
clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
|
257 |
clean_response = clean_response.replace("Using the following context:", "").strip()
|
@@ -272,7 +270,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
|
|
272 |
final_response = '\n\n'.join(unique_paragraphs)
|
273 |
|
274 |
print(f"Final clean response: {final_response[:100]}...")
|
275 |
-
|
276 |
|
277 |
def duckduckgo_search(query):
|
278 |
with DDGS() as ddgs:
|
@@ -465,16 +463,15 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
465 |
yield "No documents available. Please upload PDF documents to answer questions."
|
466 |
return
|
467 |
|
468 |
-
# New function to get documents by source
|
469 |
-
def get_documents_by_source(database, sources):
|
470 |
-
return [doc for doc in database.docstore._dict.values() if doc.metadata["source"] in sources]
|
471 |
-
|
472 |
# Log all documents in the database
|
473 |
-
|
|
|
|
|
474 |
|
475 |
# Get only the selected documents
|
476 |
-
selected_docs_content =
|
477 |
logging.info(f"Number of selected documents: {len(selected_docs_content)}")
|
|
|
478 |
|
479 |
if not selected_docs_content:
|
480 |
logging.warning(f"No documents found for the selected sources: {selected_docs}")
|
@@ -482,11 +479,14 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
482 |
return
|
483 |
|
484 |
# Perform similarity search on selected documents
|
485 |
-
relevant_docs =
|
486 |
-
|
487 |
-
|
488 |
|
489 |
-
|
|
|
|
|
|
|
490 |
|
491 |
for doc in relevant_docs:
|
492 |
logging.info(f"Document source: {doc.metadata['source']}")
|
@@ -575,7 +575,8 @@ use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
|
575 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
576 |
|
577 |
demo = gr.ChatInterface(
|
578 |
-
respond,
|
|
|
579 |
additional_inputs=[
|
580 |
gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
|
581 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
|
|
|
184 |
|
185 |
return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
|
186 |
|
187 |
+
async def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2):
|
188 |
+
print(f"Starting generate_chunked_response with model: {model}, num_calls: {num_calls}")
|
189 |
full_response = ""
|
190 |
messages = [{"role": "user", "content": prompt}]
|
191 |
|
192 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
193 |
+
# Cloudflare API logic
|
194 |
+
headers = {
|
195 |
+
"Authorization": f"Bearer {API_TOKEN}",
|
196 |
+
"Content-Type": "application/json"
|
197 |
+
}
|
198 |
+
|
199 |
for i in range(num_calls):
|
200 |
print(f"Starting Cloudflare API call {i+1}")
|
|
|
|
|
|
|
201 |
try:
|
202 |
+
async with httpx.AsyncClient() as client:
|
203 |
+
async with client.stream(
|
204 |
+
"POST",
|
205 |
+
f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
|
206 |
+
json={
|
207 |
+
"messages": messages,
|
208 |
+
"stream": True,
|
209 |
+
"max_tokens": max_tokens,
|
210 |
+
"temperature": temperature
|
211 |
+
},
|
212 |
+
headers=headers
|
213 |
+
) as response:
|
214 |
+
async for line in response.aiter_lines():
|
215 |
+
if line.startswith("data: "):
|
216 |
+
try:
|
217 |
+
json_data = json.loads(line[6:])
|
218 |
+
chunk = json_data.get('response', '')
|
219 |
+
full_response += chunk
|
220 |
+
yield full_response
|
221 |
+
except json.JSONDecodeError:
|
222 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
223 |
print(f"Cloudflare API call {i+1} completed")
|
224 |
except Exception as e:
|
225 |
print(f"Error in generating response from Cloudflare: {str(e)}")
|
226 |
+
if i == num_calls - 1: # If this is the last attempt
|
227 |
+
yield full_response # Yield whatever response we have so far
|
228 |
else:
|
229 |
+
# Hugging Face API logic
|
230 |
client = InferenceClient(model, token=huggingface_token)
|
231 |
|
232 |
for i in range(num_calls):
|
233 |
print(f"Starting Hugging Face API call {i+1}")
|
|
|
|
|
|
|
234 |
try:
|
235 |
+
async for message in client.chat_completion_stream(
|
236 |
messages=messages,
|
237 |
max_tokens=max_tokens,
|
238 |
temperature=temperature,
|
|
|
239 |
):
|
|
|
|
|
|
|
240 |
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
241 |
chunk = message.choices[0].delta.content
|
242 |
full_response += chunk
|
243 |
+
yield full_response
|
244 |
print(f"Hugging Face API call {i+1} completed")
|
245 |
except Exception as e:
|
246 |
print(f"Error in generating response from Hugging Face: {str(e)}")
|
247 |
+
if i == num_calls - 1: # If this is the last attempt
|
248 |
+
yield full_response # Yield whatever response we have so far
|
249 |
|
250 |
+
if not full_response:
|
251 |
+
yield "I apologize, but I couldn't generate a response at this time. Please try again later."
|
252 |
+
|
253 |
# Clean up the response
|
254 |
clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
|
255 |
clean_response = clean_response.replace("Using the following context:", "").strip()
|
|
|
270 |
final_response = '\n\n'.join(unique_paragraphs)
|
271 |
|
272 |
print(f"Final clean response: {final_response[:100]}...")
|
273 |
+
yield final_response
|
274 |
|
275 |
def duckduckgo_search(query):
|
276 |
with DDGS() as ddgs:
|
|
|
463 |
yield "No documents available. Please upload PDF documents to answer questions."
|
464 |
return
|
465 |
|
|
|
|
|
|
|
|
|
466 |
# Log all documents in the database
|
467 |
+
all_docs = list(database.docstore._dict.values())
|
468 |
+
logging.info(f"All documents in database: {[doc.metadata['source'] for doc in all_docs]}")
|
469 |
+
logging.info(f"Number of documents in database: {len(all_docs)}")
|
470 |
|
471 |
# Get only the selected documents
|
472 |
+
selected_docs_content = [doc for doc in all_docs if doc.metadata["source"] in selected_docs]
|
473 |
logging.info(f"Number of selected documents: {len(selected_docs_content)}")
|
474 |
+
logging.info(f"Selected documents: {[doc.metadata['source'] for doc in selected_docs_content]}")
|
475 |
|
476 |
if not selected_docs_content:
|
477 |
logging.warning(f"No documents found for the selected sources: {selected_docs}")
|
|
|
479 |
return
|
480 |
|
481 |
# Perform similarity search on selected documents
|
482 |
+
relevant_docs = database.similarity_search(query, k=5, filter=lambda doc: doc.metadata["source"] in selected_docs)
|
483 |
+
logging.info(f"Number of relevant documents after similarity search: {len(relevant_docs)}")
|
484 |
+
logging.info(f"Relevant documents: {[doc.metadata['source'] for doc in relevant_docs]}")
|
485 |
|
486 |
+
if not relevant_docs:
|
487 |
+
logging.warning("No relevant documents found after similarity search")
|
488 |
+
yield "No relevant information found in the selected documents. Please try rephrasing your query."
|
489 |
+
return
|
490 |
|
491 |
for doc in relevant_docs:
|
492 |
logging.info(f"Document source: {doc.metadata['source']}")
|
|
|
575 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
576 |
|
577 |
demo = gr.ChatInterface(
|
578 |
+
fn=respond,
|
579 |
+
async_mode=True,
|
580 |
additional_inputs=[
|
581 |
gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
|
582 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
|