Shreyas094 commited on
Commit
43ce500
·
verified ·
1 Parent(s): c702374

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -53
app.py CHANGED
@@ -184,74 +184,72 @@ def delete_documents(selected_docs):
184
 
185
  return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
186
 
187
- def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
188
- print(f"Starting generate_chunked_response with {num_calls} calls")
189
  full_response = ""
190
  messages = [{"role": "user", "content": prompt}]
191
 
192
  if model == "@cf/meta/llama-3.1-8b-instruct":
193
- # Cloudflare API
 
 
 
 
 
194
  for i in range(num_calls):
195
  print(f"Starting Cloudflare API call {i+1}")
196
- if should_stop:
197
- print("Stop clicked, breaking loop")
198
- break
199
  try:
200
- response = requests.post(
201
- f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
202
- headers={"Authorization": f"Bearer {API_TOKEN}"},
203
- json={
204
- "stream": true,
205
- "messages": [
206
- {"role": "system", "content": "You are a friendly assistant"},
207
- {"role": "user", "content": prompt}
208
- ],
209
- "max_tokens": max_tokens,
210
- "temperature": temperature
211
- },
212
- stream=true
213
- )
214
-
215
- for line in response.iter_lines():
216
- if should_stop:
217
- print("Stop clicked during streaming, breaking")
218
- break
219
- if line:
220
- try:
221
- json_data = json.loads(line.decode('utf-8').split('data: ')[1])
222
- chunk = json_data['response']
223
- full_response += chunk
224
- except json.JSONDecodeError:
225
- continue
226
  print(f"Cloudflare API call {i+1} completed")
227
  except Exception as e:
228
  print(f"Error in generating response from Cloudflare: {str(e)}")
 
 
229
  else:
230
- # Original Hugging Face API logic
231
  client = InferenceClient(model, token=huggingface_token)
232
 
233
  for i in range(num_calls):
234
  print(f"Starting Hugging Face API call {i+1}")
235
- if should_stop:
236
- print("Stop clicked, breaking loop")
237
- break
238
  try:
239
- for message in client.chat_completion(
240
  messages=messages,
241
  max_tokens=max_tokens,
242
  temperature=temperature,
243
- stream=True,
244
  ):
245
- if should_stop:
246
- print("Stop clicked during streaming, breaking")
247
- break
248
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
249
  chunk = message.choices[0].delta.content
250
  full_response += chunk
 
251
  print(f"Hugging Face API call {i+1} completed")
252
  except Exception as e:
253
  print(f"Error in generating response from Hugging Face: {str(e)}")
 
 
254
 
 
 
 
255
  # Clean up the response
256
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
257
  clean_response = clean_response.replace("Using the following context:", "").strip()
@@ -272,7 +270,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
272
  final_response = '\n\n'.join(unique_paragraphs)
273
 
274
  print(f"Final clean response: {final_response[:100]}...")
275
- return final_response
276
 
277
  def duckduckgo_search(query):
278
  with DDGS() as ddgs:
@@ -465,16 +463,15 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
465
  yield "No documents available. Please upload PDF documents to answer questions."
466
  return
467
 
468
- # New function to get documents by source
469
- def get_documents_by_source(database, sources):
470
- return [doc for doc in database.docstore._dict.values() if doc.metadata["source"] in sources]
471
-
472
  # Log all documents in the database
473
- logging.info(f"All documents in database: {[doc.metadata['source'] for doc in database.docstore._dict.values()]}")
 
 
474
 
475
  # Get only the selected documents
476
- selected_docs_content = get_documents_by_source(database, selected_docs)
477
  logging.info(f"Number of selected documents: {len(selected_docs_content)}")
 
478
 
479
  if not selected_docs_content:
480
  logging.warning(f"No documents found for the selected sources: {selected_docs}")
@@ -482,11 +479,14 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
482
  return
483
 
484
  # Perform similarity search on selected documents
485
- relevant_docs = sorted(selected_docs_content,
486
- key=lambda doc: database.similarity_search_with_score(query, k=1, filter={"source": doc.metadata["source"]})[0][1],
487
- reverse=True)
488
 
489
- logging.info(f"Number of relevant documents: {len(relevant_docs)}")
 
 
 
490
 
491
  for doc in relevant_docs:
492
  logging.info(f"Document source: {doc.metadata['source']}")
@@ -575,7 +575,8 @@ use_web_search = gr.Checkbox(label="Use Web Search", value=True)
575
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
576
 
577
  demo = gr.ChatInterface(
578
- respond,
 
579
  additional_inputs=[
580
  gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
581
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
 
184
 
185
  return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
186
 
187
+ async def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2):
188
+ print(f"Starting generate_chunked_response with model: {model}, num_calls: {num_calls}")
189
  full_response = ""
190
  messages = [{"role": "user", "content": prompt}]
191
 
192
  if model == "@cf/meta/llama-3.1-8b-instruct":
193
+ # Cloudflare API logic
194
+ headers = {
195
+ "Authorization": f"Bearer {API_TOKEN}",
196
+ "Content-Type": "application/json"
197
+ }
198
+
199
  for i in range(num_calls):
200
  print(f"Starting Cloudflare API call {i+1}")
 
 
 
201
  try:
202
+ async with httpx.AsyncClient() as client:
203
+ async with client.stream(
204
+ "POST",
205
+ f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
206
+ json={
207
+ "messages": messages,
208
+ "stream": True,
209
+ "max_tokens": max_tokens,
210
+ "temperature": temperature
211
+ },
212
+ headers=headers
213
+ ) as response:
214
+ async for line in response.aiter_lines():
215
+ if line.startswith("data: "):
216
+ try:
217
+ json_data = json.loads(line[6:])
218
+ chunk = json_data.get('response', '')
219
+ full_response += chunk
220
+ yield full_response
221
+ except json.JSONDecodeError:
222
+ continue
 
 
 
 
 
223
  print(f"Cloudflare API call {i+1} completed")
224
  except Exception as e:
225
  print(f"Error in generating response from Cloudflare: {str(e)}")
226
+ if i == num_calls - 1: # If this is the last attempt
227
+ yield full_response # Yield whatever response we have so far
228
  else:
229
+ # Hugging Face API logic
230
  client = InferenceClient(model, token=huggingface_token)
231
 
232
  for i in range(num_calls):
233
  print(f"Starting Hugging Face API call {i+1}")
 
 
 
234
  try:
235
+ async for message in client.chat_completion_stream(
236
  messages=messages,
237
  max_tokens=max_tokens,
238
  temperature=temperature,
 
239
  ):
 
 
 
240
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
241
  chunk = message.choices[0].delta.content
242
  full_response += chunk
243
+ yield full_response
244
  print(f"Hugging Face API call {i+1} completed")
245
  except Exception as e:
246
  print(f"Error in generating response from Hugging Face: {str(e)}")
247
+ if i == num_calls - 1: # If this is the last attempt
248
+ yield full_response # Yield whatever response we have so far
249
 
250
+ if not full_response:
251
+ yield "I apologize, but I couldn't generate a response at this time. Please try again later."
252
+
253
  # Clean up the response
254
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
255
  clean_response = clean_response.replace("Using the following context:", "").strip()
 
270
  final_response = '\n\n'.join(unique_paragraphs)
271
 
272
  print(f"Final clean response: {final_response[:100]}...")
273
+ yield final_response
274
 
275
  def duckduckgo_search(query):
276
  with DDGS() as ddgs:
 
463
  yield "No documents available. Please upload PDF documents to answer questions."
464
  return
465
 
 
 
 
 
466
  # Log all documents in the database
467
+ all_docs = list(database.docstore._dict.values())
468
+ logging.info(f"All documents in database: {[doc.metadata['source'] for doc in all_docs]}")
469
+ logging.info(f"Number of documents in database: {len(all_docs)}")
470
 
471
  # Get only the selected documents
472
+ selected_docs_content = [doc for doc in all_docs if doc.metadata["source"] in selected_docs]
473
  logging.info(f"Number of selected documents: {len(selected_docs_content)}")
474
+ logging.info(f"Selected documents: {[doc.metadata['source'] for doc in selected_docs_content]}")
475
 
476
  if not selected_docs_content:
477
  logging.warning(f"No documents found for the selected sources: {selected_docs}")
 
479
  return
480
 
481
  # Perform similarity search on selected documents
482
+ relevant_docs = database.similarity_search(query, k=5, filter=lambda doc: doc.metadata["source"] in selected_docs)
483
+ logging.info(f"Number of relevant documents after similarity search: {len(relevant_docs)}")
484
+ logging.info(f"Relevant documents: {[doc.metadata['source'] for doc in relevant_docs]}")
485
 
486
+ if not relevant_docs:
487
+ logging.warning("No relevant documents found after similarity search")
488
+ yield "No relevant information found in the selected documents. Please try rephrasing your query."
489
+ return
490
 
491
  for doc in relevant_docs:
492
  logging.info(f"Document source: {doc.metadata['source']}")
 
575
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
576
 
577
  demo = gr.ChatInterface(
578
+ fn=respond,
579
+ async_mode=True,
580
  additional_inputs=[
581
  gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
582
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),