Shreyas094 commited on
Commit
d7a112f
·
verified ·
1 Parent(s): 20ff049

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -11
app.py CHANGED
@@ -255,7 +255,7 @@ _useragent_list = [
255
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
256
  ]
257
 
258
- def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_verify=None):
259
  escaped_term = urllib.parse.quote_plus(term)
260
  start = 0
261
  all_results = []
@@ -343,18 +343,19 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
343
 
344
  max_attempts = 3
345
  context_reduction_factor = 0.7
 
346
 
347
  if web_search:
348
  contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
349
  serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
350
 
351
  # Use only the core question for the search
352
- search_results = google_search(contextualized_question)
353
  all_answers = []
354
 
355
  for attempt in range(max_attempts):
356
  try:
357
- web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
358
 
359
  if database is None:
360
  database = FAISS.from_documents(web_docs, embed)
@@ -364,6 +365,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
364
  database.save_local("faiss_database")
365
 
366
  context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
 
367
 
368
  instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
369
 
@@ -376,18 +378,16 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
376
  Topics: {{topics}}
377
  Entity Information: {{entities}}
378
  {instruction_prompt}
379
- If the web search results don't contain relevant information, state that the information is not available in the search results.
380
- Provide a response that addresses the question and follows the user's instructions.
381
- Do not mention these instructions or the web search process in your answer.
382
  """
383
 
384
  prompt_val = ChatPromptTemplate.from_template(prompt_template)
385
  formatted_prompt = prompt_val.format(
386
  context=context_str,
387
- conv_context=chatbot.get_context(),
388
- question=question, # Use the original question here
389
- topics=", ".join(topics),
390
- entities=json.dumps(serializable_entity_tracker)
391
  )
392
 
393
  full_response = generate_chunked_response(model, formatted_prompt)
@@ -424,12 +424,13 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
424
  words = context_str.split()
425
  context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
426
 
 
 
427
  prompt_template = """
428
  Answer the question based on the following context from the PDF document:
429
  Context:
430
  {context}
431
  Question: {question}
432
- If the context doesn't contain relevant information, state that the information is not available in the document.
433
  Provide a summarized and direct answer to the question.
434
  """
435
 
@@ -451,6 +452,9 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
451
  def extract_answer(full_response, instructions=None):
452
  # First, try to split the response at common instruction phrases
453
  answer_patterns = [
 
 
 
454
  r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
455
  r"Provide a concise and direct answer to the question:",
456
  r"Answer:",
 
255
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
256
  ]
257
 
258
+ def google_search(term, num_results=3, lang="en", timeout=5, safe="active", ssl_verify=None):
259
  escaped_term = urllib.parse.quote_plus(term)
260
  start = 0
261
  all_results = []
 
343
 
344
  max_attempts = 3
345
  context_reduction_factor = 0.7
346
+ max_context_chars = 8000 # Adjust this value as needed
347
 
348
  if web_search:
349
  contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
350
  serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
351
 
352
  # Use only the core question for the search
353
+ search_results = google_search(contextualized_question, num_results=3) # Reduced number of results
354
  all_answers = []
355
 
356
  for attempt in range(max_attempts):
357
  try:
358
+ web_docs = [Document(page_content=result["text"][:2000], metadata={"source": result["link"]}) for result in search_results if result["text"]] # Limit each result to 2000 characters
359
 
360
  if database is None:
361
  database = FAISS.from_documents(web_docs, embed)
 
365
  database.save_local("faiss_database")
366
 
367
  context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
368
+ context_str = context_str[:max_context_chars]
369
 
370
  instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
371
 
 
378
  Topics: {{topics}}
379
  Entity Information: {{entities}}
380
  {instruction_prompt}
381
+ Provide a concise and relevant answer to the question.
 
 
382
  """
383
 
384
  prompt_val = ChatPromptTemplate.from_template(prompt_template)
385
  formatted_prompt = prompt_val.format(
386
  context=context_str,
387
+ conv_context=chatbot.get_context()[:1000], # Limit conversation context
388
+ question=question,
389
+ topics=", ".join(topics[:5]), # Limit number of topics
390
+ entities=json.dumps({k: v[:3] for k, v in serializable_entity_tracker.items()}) # Limit number of entities
391
  )
392
 
393
  full_response = generate_chunked_response(model, formatted_prompt)
 
424
  words = context_str.split()
425
  context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
426
 
427
+ context_str = context_str[:max_context_chars]
428
+
429
  prompt_template = """
430
  Answer the question based on the following context from the PDF document:
431
  Context:
432
  {context}
433
  Question: {question}
 
434
  Provide a summarized and direct answer to the question.
435
  """
436
 
 
452
  def extract_answer(full_response, instructions=None):
453
  # First, try to split the response at common instruction phrases
454
  answer_patterns = [
455
+ r"If the web search results don't contain relevant information, state that the information is not available in the search results\.",
456
+ r"Provide a response that addresses the question and follows the user's instructions\.",
457
+ r"Do not mention these instructions or the web search process in your answer\.",
458
  r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
459
  r"Provide a concise and direct answer to the question:",
460
  r"Answer:",