Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -255,7 +255,7 @@ _useragent_list = [
|
|
255 |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
|
256 |
]
|
257 |
|
258 |
-
def google_search(term, num_results=
|
259 |
escaped_term = urllib.parse.quote_plus(term)
|
260 |
start = 0
|
261 |
all_results = []
|
@@ -343,18 +343,19 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
343 |
|
344 |
max_attempts = 3
|
345 |
context_reduction_factor = 0.7
|
|
|
346 |
|
347 |
if web_search:
|
348 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
349 |
serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
|
350 |
|
351 |
# Use only the core question for the search
|
352 |
-
search_results = google_search(contextualized_question)
|
353 |
all_answers = []
|
354 |
|
355 |
for attempt in range(max_attempts):
|
356 |
try:
|
357 |
-
web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
|
358 |
|
359 |
if database is None:
|
360 |
database = FAISS.from_documents(web_docs, embed)
|
@@ -364,6 +365,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
364 |
database.save_local("faiss_database")
|
365 |
|
366 |
context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
|
|
|
367 |
|
368 |
instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
|
369 |
|
@@ -376,18 +378,16 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
376 |
Topics: {{topics}}
|
377 |
Entity Information: {{entities}}
|
378 |
{instruction_prompt}
|
379 |
-
|
380 |
-
Provide a response that addresses the question and follows the user's instructions.
|
381 |
-
Do not mention these instructions or the web search process in your answer.
|
382 |
"""
|
383 |
|
384 |
prompt_val = ChatPromptTemplate.from_template(prompt_template)
|
385 |
formatted_prompt = prompt_val.format(
|
386 |
context=context_str,
|
387 |
-
conv_context=chatbot.get_context(),
|
388 |
-
question=question,
|
389 |
-
topics=", ".join(topics),
|
390 |
-
entities=json.dumps(serializable_entity_tracker)
|
391 |
)
|
392 |
|
393 |
full_response = generate_chunked_response(model, formatted_prompt)
|
@@ -424,12 +424,13 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
424 |
words = context_str.split()
|
425 |
context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
|
426 |
|
|
|
|
|
427 |
prompt_template = """
|
428 |
Answer the question based on the following context from the PDF document:
|
429 |
Context:
|
430 |
{context}
|
431 |
Question: {question}
|
432 |
-
If the context doesn't contain relevant information, state that the information is not available in the document.
|
433 |
Provide a summarized and direct answer to the question.
|
434 |
"""
|
435 |
|
@@ -451,6 +452,9 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
451 |
def extract_answer(full_response, instructions=None):
|
452 |
# First, try to split the response at common instruction phrases
|
453 |
answer_patterns = [
|
|
|
|
|
|
|
454 |
r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
|
455 |
r"Provide a concise and direct answer to the question:",
|
456 |
r"Answer:",
|
|
|
255 |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
|
256 |
]
|
257 |
|
258 |
+
def google_search(term, num_results=3, lang="en", timeout=5, safe="active", ssl_verify=None):
|
259 |
escaped_term = urllib.parse.quote_plus(term)
|
260 |
start = 0
|
261 |
all_results = []
|
|
|
343 |
|
344 |
max_attempts = 3
|
345 |
context_reduction_factor = 0.7
|
346 |
+
max_context_chars = 8000 # Adjust this value as needed
|
347 |
|
348 |
if web_search:
|
349 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
350 |
serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
|
351 |
|
352 |
# Use only the core question for the search
|
353 |
+
search_results = google_search(contextualized_question, num_results=3) # Reduced number of results
|
354 |
all_answers = []
|
355 |
|
356 |
for attempt in range(max_attempts):
|
357 |
try:
|
358 |
+
web_docs = [Document(page_content=result["text"][:2000], metadata={"source": result["link"]}) for result in search_results if result["text"]] # Limit each result to 2000 characters
|
359 |
|
360 |
if database is None:
|
361 |
database = FAISS.from_documents(web_docs, embed)
|
|
|
365 |
database.save_local("faiss_database")
|
366 |
|
367 |
context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
|
368 |
+
context_str = context_str[:max_context_chars]
|
369 |
|
370 |
instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
|
371 |
|
|
|
378 |
Topics: {{topics}}
|
379 |
Entity Information: {{entities}}
|
380 |
{instruction_prompt}
|
381 |
+
Provide a concise and relevant answer to the question.
|
|
|
|
|
382 |
"""
|
383 |
|
384 |
prompt_val = ChatPromptTemplate.from_template(prompt_template)
|
385 |
formatted_prompt = prompt_val.format(
|
386 |
context=context_str,
|
387 |
+
conv_context=chatbot.get_context()[:1000], # Limit conversation context
|
388 |
+
question=question,
|
389 |
+
topics=", ".join(topics[:5]), # Limit number of topics
|
390 |
+
entities=json.dumps({k: v[:3] for k, v in serializable_entity_tracker.items()}) # Limit number of entities
|
391 |
)
|
392 |
|
393 |
full_response = generate_chunked_response(model, formatted_prompt)
|
|
|
424 |
words = context_str.split()
|
425 |
context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
|
426 |
|
427 |
+
context_str = context_str[:max_context_chars]
|
428 |
+
|
429 |
prompt_template = """
|
430 |
Answer the question based on the following context from the PDF document:
|
431 |
Context:
|
432 |
{context}
|
433 |
Question: {question}
|
|
|
434 |
Provide a summarized and direct answer to the question.
|
435 |
"""
|
436 |
|
|
|
452 |
def extract_answer(full_response, instructions=None):
|
453 |
# First, try to split the response at common instruction phrases
|
454 |
answer_patterns = [
|
455 |
+
r"If the web search results don't contain relevant information, state that the information is not available in the search results\.",
|
456 |
+
r"Provide a response that addresses the question and follows the user's instructions\.",
|
457 |
+
r"Do not mention these instructions or the web search process in your answer\.",
|
458 |
r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
|
459 |
r"Provide a concise and direct answer to the question:",
|
460 |
r"Answer:",
|