Shreyas094 commited on
Commit
ecb9aad
·
verified ·
1 Parent(s): cb9f424

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -268
app.py CHANGED
@@ -16,7 +16,6 @@ from huggingface_hub import InferenceClient
16
  import inspect
17
  import logging
18
 
19
-
20
  # Set up basic configuration for logging
21
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
 
@@ -88,7 +87,6 @@ def update_vectors(files, parser):
88
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
89
  all_data.extend(data)
90
  total_chunks += len(data)
91
- # Append new documents instead of replacing
92
  if not any(doc["name"] == file.name for doc in uploaded_documents):
93
  uploaded_documents.append({"name": file.name, "selected": True})
94
  logging.info(f"Added new document to uploaded_documents: {file.name}")
@@ -116,96 +114,6 @@ def update_vectors(files, parser):
116
  label="Select documents to query"
117
  )
118
 
119
- def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, temperature=0.2, should_stop=False):
120
- print(f"Starting generate_chunked_response with {num_calls} calls")
121
- full_response = ""
122
- messages = [{"role": "user", "content": prompt}]
123
-
124
- if model == "@cf/meta/llama-3.1-8b-instruct":
125
- # Cloudflare API
126
- for i in range(num_calls):
127
- print(f"Starting Cloudflare API call {i+1}")
128
- if should_stop:
129
- print("Stop clicked, breaking loop")
130
- break
131
- try:
132
- response = requests.post(
133
- f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
134
- headers={"Authorization": f"Bearer {API_TOKEN}"},
135
- json={
136
- "stream": true,
137
- "messages": [
138
- {"role": "system", "content": "You are a friendly assistant"},
139
- {"role": "user", "content": prompt}
140
- ],
141
- "max_tokens": max_tokens,
142
- "temperature": temperature
143
- },
144
- stream=true
145
- )
146
-
147
- for line in response.iter_lines():
148
- if should_stop:
149
- print("Stop clicked during streaming, breaking")
150
- break
151
- if line:
152
- try:
153
- json_data = json.loads(line.decode('utf-8').split('data: ')[1])
154
- chunk = json_data['response']
155
- full_response += chunk
156
- except json.JSONDecodeError:
157
- continue
158
- print(f"Cloudflare API call {i+1} completed")
159
- except Exception as e:
160
- print(f"Error in generating response from Cloudflare: {str(e)}")
161
- else:
162
- # Original Hugging Face API logic
163
- client = InferenceClient(model, token=huggingface_token)
164
-
165
- for i in range(num_calls):
166
- print(f"Starting Hugging Face API call {i+1}")
167
- if should_stop:
168
- print("Stop clicked, breaking loop")
169
- break
170
- try:
171
- for message in client.chat_completion(
172
- messages=messages,
173
- max_tokens=max_tokens,
174
- temperature=temperature,
175
- stream=True,
176
- ):
177
- if should_stop:
178
- print("Stop clicked during streaming, breaking")
179
- break
180
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
181
- chunk = message.choices[0].delta.content
182
- full_response += chunk
183
- print(f"Hugging Face API call {i+1} completed")
184
- except Exception as e:
185
- print(f"Error in generating response from Hugging Face: {str(e)}")
186
-
187
- # Clean up the response
188
- clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
189
- clean_response = clean_response.replace("Using the following context:", "").strip()
190
- clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
191
-
192
- # Remove duplicate paragraphs and sentences
193
- paragraphs = clean_response.split('\n\n')
194
- unique_paragraphs = []
195
- for paragraph in paragraphs:
196
- if paragraph not in unique_paragraphs:
197
- sentences = paragraph.split('. ')
198
- unique_sentences = []
199
- for sentence in sentences:
200
- if sentence not in unique_sentences:
201
- unique_sentences.append(sentence)
202
- unique_paragraphs.append('. '.join(unique_sentences))
203
-
204
- final_response = '\n\n'.join(unique_paragraphs)
205
-
206
- print(f"Final clean response: {final_response[:100]}...")
207
- return final_response
208
-
209
  def duckduckgo_search(query):
210
  with DDGS() as ddgs:
211
  results = ddgs.text(query, max_results=5)
@@ -217,72 +125,6 @@ class CitingSources(BaseModel):
217
  description="List of sources to cite. Should be an URL of the source."
218
  )
219
 
220
- def retry_last_response(history, use_web_search, model, temperature, num_calls):
221
- if not history:
222
- return history
223
-
224
- last_user_msg = history[-1][0]
225
- history = history[:-1] # Remove the last response
226
-
227
- return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
228
-
229
- def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
230
- logging.info(f"User Query: {message}")
231
- logging.info(f"Model Used: {model}")
232
- logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
233
-
234
- logging.info(f"Selected Documents: {selected_docs}")
235
-
236
- try:
237
- if use_web_search:
238
- for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
239
- response = f"{main_content}\n\n{sources}"
240
- first_line = response.split('\n')[0] if response else ''
241
- logging.info(f"Generated Response (first line): {first_line}")
242
- yield response
243
- else:
244
- embed = get_embeddings()
245
- if os.path.exists("faiss_database"):
246
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
247
- retriever = database.as_retriever()
248
-
249
- # Filter relevant documents based on user selection
250
- all_relevant_docs = retriever.get_relevant_documents(message)
251
- relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
252
-
253
- if not relevant_docs:
254
- yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
255
- return
256
-
257
- context_str = "\n".join([doc.page_content for doc in relevant_docs])
258
- else:
259
- context_str = "No documents available."
260
- yield "No documents available. Please upload PDF documents to answer questions."
261
- return
262
-
263
- if model == "@cf/meta/llama-3.1-8b-instruct":
264
- # Use Cloudflare API
265
- for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
266
- first_line = partial_response.split('\n')[0] if partial_response else ''
267
- logging.info(f"Generated Response (first line): {first_line}")
268
- yield partial_response
269
- else:
270
- # Use Hugging Face API
271
- for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
272
- first_line = partial_response.split('\n')[0] if partial_response else ''
273
- logging.info(f"Generated Response (first line): {first_line}")
274
- yield partial_response
275
- except Exception as e:
276
- logging.error(f"Error with {model}: {str(e)}")
277
- if "microsoft/Phi-3-mini-4k-instruct" in model:
278
- logging.info("Falling back to Mistral model due to Phi-3 error")
279
- fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
280
- yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
281
- else:
282
- yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
283
-
284
- logging.basicConfig(level=logging.DEBUG)
285
-
286
  def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
287
  headers = {
288
  "Authorization": f"Bearer {API_TOKEN}",
@@ -337,6 +179,37 @@ After writing the document, please provide a list of sources used in your respon
337
  if not full_response:
338
  yield "I apologize, but I couldn't generate a response at this time. Please try again later."
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
341
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
342
 
@@ -354,6 +227,7 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
354
  relevant_docs = retriever.get_relevant_documents(query)
355
  logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
356
 
 
357
  filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
358
  logging.info(f"Number of filtered documents: {len(filtered_docs)}")
359
 
@@ -362,24 +236,28 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
362
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
363
  return
364
 
 
 
 
 
365
  context_str = "\n".join([doc.page_content for doc in filtered_docs])
366
  logging.info(f"Total context length: {len(context_str)}")
367
 
368
- full_response = ""
369
-
370
  if model == "@cf/meta/llama-3.1-8b-instruct":
371
  logging.info("Using Cloudflare API")
 
372
  for response in get_response_from_cloudflare(prompt="", context=context_str, query=query, num_calls=num_calls, temperature=temperature, search_type="pdf"):
373
- full_response += response
374
- yield full_response
375
  else:
376
  logging.info("Using Hugging Face API")
 
377
  prompt = f"""Using the following context from the PDF documents:
378
  {context_str}
379
  Write a detailed and complete response that answers the following user question: '{query}'"""
380
 
381
  client = InferenceClient(model, token=huggingface_token)
382
 
 
383
  for i in range(num_calls):
384
  logging.info(f"API call {i+1}/{num_calls}")
385
  for message in client.chat_completion(
@@ -390,51 +268,36 @@ Write a detailed and complete response that answers the following user question:
390
  ):
391
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
392
  chunk = message.choices[0].delta.content
393
- full_response += chunk
394
- yield full_response
395
-
396
- logging.info("Finished generating initial response")
397
 
398
- def get_response_with_search(query, model, num_calls=3, temperature=0.2):
399
- search_results = duckduckgo_search(query)
400
- context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
401
- for result in search_results if 'body' in result)
402
-
403
- prompt = f"""Using the following context:
404
  {context}
405
- Write a detailed and complete research document that fulfills the following user request: '{query}'
406
- After writing the document, please provide a list of sources used in your response."""
407
 
408
- full_response = ""
 
 
 
 
409
 
410
  if model == "@cf/meta/llama-3.1-8b-instruct":
411
- # Use Cloudflare API
412
- for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
413
- full_response += response
414
- yield full_response, "" # Yield streaming response without sources
415
  else:
416
- # Use Hugging Face API
417
  client = InferenceClient(model, token=huggingface_token)
418
-
419
- for i in range(num_calls):
420
- for message in client.chat_completion(
421
- messages=[{"role": "user", "content": prompt}],
422
- max_tokens=1000,
423
- temperature=temperature,
424
- stream=True,
425
- ):
426
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
427
- chunk = message.choices[0].delta.content
428
- full_response += chunk
429
- yield full_response, "" # Yield partial main content without sources
430
-
431
- logging.info("Finished generating initial response")
432
 
433
- def vote(data: gr.LikeData):
434
- if data.liked:
435
- print(f"You upvoted this response: {data.value}")
436
- else:
437
- print(f"You downvoted this response: {data.value}")
438
  def chatbot_interface(message, history, use_web_search, model, temperature, num_calls, selected_docs):
439
  if not message.strip():
440
  return "", history
@@ -442,15 +305,16 @@ def chatbot_interface(message, history, use_web_search, model, temperature, num_
442
  history = history + [(message, "")]
443
 
444
  try:
445
- if use_web_search:
446
- for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
447
- response = f"{main_content}\n\n{sources}"
448
- history[-1] = (message, response)
449
- yield history
450
- else:
451
- for response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
452
- history[-1] = (message, response)
453
- yield history
 
454
  except gr.CancelledError:
455
  yield history
456
  except Exception as e:
@@ -460,50 +324,88 @@ def chatbot_interface(message, history, use_web_search, model, temperature, num_
460
 
461
  def continue_generation(history, use_web_search, model, temperature, selected_docs):
462
  if not history:
463
- return history
464
-
465
- last_user_msg = history[-1][0]
466
- previous_response = history[-1][1]
467
-
468
- try:
469
- if use_web_search:
470
- search_results = duckduckgo_search(last_user_msg)
471
- context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
472
- for result in search_results if 'body' in result)
473
- else:
474
- embed = get_embeddings()
475
- if os.path.exists("faiss_database"):
476
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
477
- retriever = database.as_retriever()
478
- relevant_docs = retriever.get_relevant_documents(last_user_msg)
479
- filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
480
- context = "\n".join([doc.page_content for doc in filtered_docs])
481
- else:
482
- return history
483
-
484
- prompt = f"""Using the following context and partial response, please continue and complete the response:
485
 
486
- Context:
487
- {context}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
- Query: {last_user_msg}
 
 
 
 
490
 
491
- Partial Response:
492
- {previous_response}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
 
494
- Please continue the response from where it was cut off:"""
 
 
 
 
 
 
 
 
 
 
 
495
 
496
- continued_response = previous_response
497
- for chunk in get_response_from_cloudflare(prompt=prompt, context="", query="", num_calls=1, temperature=temperature, search_type="continuation"):
498
- continued_response += chunk
499
- history[-1] = (last_user_msg, continued_response)
500
- yield history
501
- except gr.CancelledError:
502
- yield history
503
- except Exception as e:
504
- logging.error(f"Unexpected error in continue_generation: {str(e)}")
505
- history[-1] = (last_user_msg, f"{previous_response}\n\nAn error occurred while continuing generation: {str(e)}")
506
- yield history
507
 
508
  css = """
509
  /* Add your custom CSS here */
@@ -518,7 +420,9 @@ def display_documents():
518
  label="Select documents to query"
519
  )
520
 
 
521
  document_selector = gr.CheckboxGroup(label="Select documents to query")
 
522
  use_web_search = gr.Checkbox(label="Use Web Search", value=False)
523
 
524
  demo = gr.ChatInterface(
@@ -528,14 +432,10 @@ demo = gr.ChatInterface(
528
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
529
  gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
530
  use_web_search,
531
- document_selector
532
- ],
533
- additional_buttons=[
534
- gr.Button("Continue Generation"),
535
- gr.Button("Upload Document")
536
  ],
537
  title="AI-powered Web Search and PDF Chat Assistant",
538
- description="Chat with your PDFs or use web search to answer questions.",
539
  theme=gr.themes.Soft(
540
  primary_hue="orange",
541
  secondary_hue="amber",
@@ -567,26 +467,17 @@ demo = gr.ChatInterface(
567
  # Add file upload functionality
568
  with demo:
569
  gr.Markdown("## Upload PDF Documents")
570
-
571
  with gr.Row():
572
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
573
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
 
574
 
575
  update_output = gr.Textbox(label="Update Status")
576
 
577
  # Update both the output text and the document selector
578
- demo.additional_buttons[1].click(
579
- update_vectors,
580
- inputs=[file_input, parser_dropdown],
581
- outputs=[update_output, document_selector]
582
- )
583
-
584
- # Set up the continue generation button
585
- demo.additional_buttons[0].click(
586
- continue_generation,
587
- inputs=[demo.chatbot, use_web_search, demo.additional_inputs[0], demo.additional_inputs[1], document_selector],
588
- outputs=demo.chatbot
589
- )
590
 
591
  gr.Markdown(
592
  """
@@ -597,8 +488,8 @@ with demo:
597
  4. Ask questions in the chat interface.
598
  5. Toggle "Use Web Search" to switch between PDF chat and web search.
599
  6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
600
- 7. Use the "Continue Generation" button if you want to extend the last response.
601
- 8. Use the provided examples or ask your own questions.
602
  """
603
  )
604
 
 
16
  import inspect
17
  import logging
18
 
 
19
  # Set up basic configuration for logging
20
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
21
 
 
87
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
88
  all_data.extend(data)
89
  total_chunks += len(data)
 
90
  if not any(doc["name"] == file.name for doc in uploaded_documents):
91
  uploaded_documents.append({"name": file.name, "selected": True})
92
  logging.info(f"Added new document to uploaded_documents: {file.name}")
 
114
  label="Select documents to query"
115
  )
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def duckduckgo_search(query):
118
  with DDGS() as ddgs:
119
  results = ddgs.text(query, max_results=5)
 
125
  description="List of sources to cite. Should be an URL of the source."
126
  )
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
129
  headers = {
130
  "Authorization": f"Bearer {API_TOKEN}",
 
179
  if not full_response:
180
  yield "I apologize, but I couldn't generate a response at this time. Please try again later."
181
 
182
+ def get_response_with_search(query, model, num_calls=3, temperature=0.2):
183
+ search_results = duckduckgo_search(query)
184
+ context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
185
+ for result in search_results if 'body' in result)
186
+
187
+ prompt = f"""Using the following context:
188
+ {context}
189
+ Write a detailed and complete research document that fulfills the following user request: '{query}'
190
+ After writing the document, please provide a list of sources used in your response."""
191
+
192
+ if model == "@cf/meta/llama-3.1-8b-instruct":
193
+ # Use Cloudflare API
194
+ for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
195
+ yield response, "" # Yield streaming response without sources
196
+ else:
197
+ # Use Hugging Face API
198
+ client = InferenceClient(model, token=huggingface_token)
199
+
200
+ main_content = ""
201
+ for i in range(num_calls):
202
+ for message in client.chat_completion(
203
+ messages=[{"role": "user", "content": prompt}],
204
+ max_tokens=1000,
205
+ temperature=temperature,
206
+ stream=True,
207
+ ):
208
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
209
+ chunk = message.choices[0].delta.content
210
+ main_content += chunk
211
+ yield main_content, "" # Yield partial main content without sources
212
+
213
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
214
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
215
 
 
227
  relevant_docs = retriever.get_relevant_documents(query)
228
  logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
229
 
230
+ # Filter relevant_docs based on selected documents
231
  filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
232
  logging.info(f"Number of filtered documents: {len(filtered_docs)}")
233
 
 
236
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
237
  return
238
 
239
+ for doc in filtered_docs:
240
+ logging.info(f"Document source: {doc.metadata['source']}")
241
+ logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
242
+
243
  context_str = "\n".join([doc.page_content for doc in filtered_docs])
244
  logging.info(f"Total context length: {len(context_str)}")
245
 
 
 
246
  if model == "@cf/meta/llama-3.1-8b-instruct":
247
  logging.info("Using Cloudflare API")
248
+ # Use Cloudflare API with the retrieved context
249
  for response in get_response_from_cloudflare(prompt="", context=context_str, query=query, num_calls=num_calls, temperature=temperature, search_type="pdf"):
250
+ yield response
 
251
  else:
252
  logging.info("Using Hugging Face API")
253
+ # Use Hugging Face API
254
  prompt = f"""Using the following context from the PDF documents:
255
  {context_str}
256
  Write a detailed and complete response that answers the following user question: '{query}'"""
257
 
258
  client = InferenceClient(model, token=huggingface_token)
259
 
260
+ response = ""
261
  for i in range(num_calls):
262
  logging.info(f"API call {i+1}/{num_calls}")
263
  for message in client.chat_completion(
 
268
  ):
269
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
270
  chunk = message.choices[0].delta.content
271
+ response += chunk
272
+ yield response # Yield partial response
273
+
274
+ logging.info("Finished generating response")
275
 
276
+ def continue_response(last_response, context, query, model, temperature):
277
+ prompt = f"""Using the following context and partial response:
278
+
279
+ Context:
 
 
280
  {context}
 
 
281
 
282
+ Partial Response:
283
+ {last_response}
284
+
285
+ Continue the response to fully answer the query: '{query}'
286
+ Make sure the continuation flows smoothly from the previous part."""
287
 
288
  if model == "@cf/meta/llama-3.1-8b-instruct":
289
+ return get_response_from_cloudflare(prompt="", context=context, query=prompt, num_calls=1, temperature=temperature, search_type="pdf")
 
 
 
290
  else:
 
291
  client = InferenceClient(model, token=huggingface_token)
292
+ for message in client.chat_completion(
293
+ messages=[{"role": "user", "content": prompt}],
294
+ max_tokens=1000,
295
+ temperature=temperature,
296
+ stream=True,
297
+ ):
298
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
299
+ yield message.choices[0].delta.content
 
 
 
 
 
 
300
 
 
 
 
 
 
301
  def chatbot_interface(message, history, use_web_search, model, temperature, num_calls, selected_docs):
302
  if not message.strip():
303
  return "", history
 
305
  history = history + [(message, "")]
306
 
307
  try:
308
+ last_response = ""
309
+ for response in respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
310
+ last_response = response
311
+ history[-1] = (message, response)
312
+ yield history
313
+
314
+ # Check if the response seems truncated
315
+ if not last_response.strip().endswith((".", "!", "?")):
316
+ history.append((None, "Response may be incomplete. Type 'continue' to generate more."))
317
+ yield history
318
  except gr.CancelledError:
319
  yield history
320
  except Exception as e:
 
324
 
325
  def continue_generation(history, use_web_search, model, temperature, selected_docs):
326
  if not history:
327
+ return history, gr.Button.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
+ last_message = history[-1][0]
330
+ last_response = history[-1][1]
331
+
332
+ if use_web_search:
333
+ search_results = duckduckgo_search(last_message)
334
+ context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
335
+ for result in search_results if 'body' in result)
336
+ else:
337
+ embed = get_embeddings()
338
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
339
+ retriever = database.as_retriever()
340
+ relevant_docs = retriever.get_relevant_documents(last_message)
341
+ filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
342
+ context = "\n".join([doc.page_content for doc in filtered_docs])
343
+
344
+ continuation = ""
345
+ for chunk in continue_response(last_response, context, last_message, model, temperature):
346
+ continuation += chunk
347
+ history[-1] = (last_message, last_response + continuation)
348
+ yield history, gr.Button.update(visible=True)
349
+
350
+ if not (last_response + continuation).strip().endswith((".", "!", "?")):
351
+ yield history, gr.Button.update(visible=True, text="Continue Generation")
352
+ else:
353
+ yield history, gr.Button.update(visible=False)
354
 
355
+ def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
356
+ logging.info(f"User Query: {message}")
357
+ logging.info(f"Model Used: {model}")
358
+ logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
359
+ logging.info(f"Selected Documents: {selected_docs}")
360
 
361
+ # Check if the user wants to continue the previous response
362
+ if message.strip().lower() == "continue" and history:
363
+ last_message = history[-2][0] # Get the last user message
364
+ last_response = history[-2][1] # Get the last bot response
365
+ context = get_context(last_message, use_web_search, selected_docs)
366
+ for continuation in continue_response(last_response, context, last_message, model, temperature):
367
+ yield last_response + continuation
368
+ else:
369
+ try:
370
+ if use_web_search:
371
+ for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
372
+ response = f"{main_content}\n\n{sources}"
373
+ first_line = response.split('\n')[0] if response else ''
374
+ logging.info(f"Generated Response (first line): {first_line}")
375
+ yield response
376
+ else:
377
+ for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
378
+ first_line = partial_response.split('\n')[0] if partial_response else ''
379
+ logging.info(f"Generated Response (first line): {first_line}")
380
+ yield partial_response
381
+ except Exception as e:
382
+ logging.error(f"Error with {model}: {str(e)}")
383
+ if "microsoft/Phi-3-mini-4k-instruct" in model:
384
+ logging.info("Falling back to Mistral model due to Phi-3 error")
385
+ fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
386
+ yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
387
+ else:
388
+ yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
389
 
390
+ def get_context(message, use_web_search, selected_docs):
391
+ if use_web_search:
392
+ search_results = duckduckgo_search(message)
393
+ return "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
394
+ for result in search_results if 'body' in result)
395
+ else:
396
+ embed = get_embeddings()
397
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
398
+ retriever = database.as_retriever()
399
+ relevant_docs = retriever.get_relevant_documents(message)
400
+ filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
401
+ return "\n".join([doc.page_content for doc in filtered_docs])
402
 
403
+
404
+ def vote(data: gr.LikeData):
405
+ if data.liked:
406
+ print(f"You upvoted this response: {data.value}")
407
+ else:
408
+ print(f"You downvoted this response: {data.value}")
 
 
 
 
 
409
 
410
  css = """
411
  /* Add your custom CSS here */
 
420
  label="Select documents to query"
421
  )
422
 
423
+ # Define the checkbox outside the demo block
424
  document_selector = gr.CheckboxGroup(label="Select documents to query")
425
+
426
  use_web_search = gr.Checkbox(label="Use Web Search", value=False)
427
 
428
  demo = gr.ChatInterface(
 
432
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
433
  gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
434
  use_web_search,
435
+ document_selector # Add the document selector to the chat interface
 
 
 
 
436
  ],
437
  title="AI-powered Web Search and PDF Chat Assistant",
438
+ description="Chat with your PDFs or use web search to answer questions. Type 'continue' to generate more if a response seems incomplete.",
439
  theme=gr.themes.Soft(
440
  primary_hue="orange",
441
  secondary_hue="amber",
 
467
  # Add file upload functionality
468
  with demo:
469
  gr.Markdown("## Upload PDF Documents")
 
470
  with gr.Row():
471
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
472
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
473
+ update_button = gr.Button("Upload Document")
474
 
475
  update_output = gr.Textbox(label="Update Status")
476
 
477
  # Update both the output text and the document selector
478
+ update_button.click(update_vectors,
479
+ inputs=[file_input, parser_dropdown],
480
+ outputs=[update_output, document_selector])
 
 
 
 
 
 
 
 
 
481
 
482
  gr.Markdown(
483
  """
 
488
  4. Ask questions in the chat interface.
489
  5. Toggle "Use Web Search" to switch between PDF chat and web search.
490
  6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
491
+ 7. Use the provided examples or ask your own questions.
492
+ 8. If a response seems incomplete, type 'continue' to generate more.
493
  """
494
  )
495