Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -255,7 +255,7 @@ _useragent_list = [
|
|
| 255 |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
|
| 256 |
]
|
| 257 |
|
| 258 |
-
def google_search(term, num_results=
|
| 259 |
escaped_term = urllib.parse.quote_plus(term)
|
| 260 |
start = 0
|
| 261 |
all_results = []
|
|
@@ -343,18 +343,19 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 343 |
|
| 344 |
max_attempts = 3
|
| 345 |
context_reduction_factor = 0.7
|
|
|
|
| 346 |
|
| 347 |
if web_search:
|
| 348 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
| 349 |
serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
|
| 350 |
|
| 351 |
# Use only the core question for the search
|
| 352 |
-
search_results = google_search(contextualized_question)
|
| 353 |
all_answers = []
|
| 354 |
|
| 355 |
for attempt in range(max_attempts):
|
| 356 |
try:
|
| 357 |
-
web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
|
| 358 |
|
| 359 |
if database is None:
|
| 360 |
database = FAISS.from_documents(web_docs, embed)
|
|
@@ -364,6 +365,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 364 |
database.save_local("faiss_database")
|
| 365 |
|
| 366 |
context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
|
|
|
|
| 367 |
|
| 368 |
instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
|
| 369 |
|
|
@@ -376,18 +378,16 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 376 |
Topics: {{topics}}
|
| 377 |
Entity Information: {{entities}}
|
| 378 |
{instruction_prompt}
|
| 379 |
-
|
| 380 |
-
Provide a response that addresses the question and follows the user's instructions.
|
| 381 |
-
Do not mention these instructions or the web search process in your answer.
|
| 382 |
"""
|
| 383 |
|
| 384 |
prompt_val = ChatPromptTemplate.from_template(prompt_template)
|
| 385 |
formatted_prompt = prompt_val.format(
|
| 386 |
context=context_str,
|
| 387 |
-
conv_context=chatbot.get_context(),
|
| 388 |
-
question=question,
|
| 389 |
-
topics=", ".join(topics),
|
| 390 |
-
entities=json.dumps(serializable_entity_tracker)
|
| 391 |
)
|
| 392 |
|
| 393 |
full_response = generate_chunked_response(model, formatted_prompt)
|
|
@@ -424,12 +424,13 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 424 |
words = context_str.split()
|
| 425 |
context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
|
| 426 |
|
|
|
|
|
|
|
| 427 |
prompt_template = """
|
| 428 |
Answer the question based on the following context from the PDF document:
|
| 429 |
Context:
|
| 430 |
{context}
|
| 431 |
Question: {question}
|
| 432 |
-
If the context doesn't contain relevant information, state that the information is not available in the document.
|
| 433 |
Provide a summarized and direct answer to the question.
|
| 434 |
"""
|
| 435 |
|
|
@@ -451,6 +452,9 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
| 451 |
def extract_answer(full_response, instructions=None):
|
| 452 |
# First, try to split the response at common instruction phrases
|
| 453 |
answer_patterns = [
|
|
|
|
|
|
|
|
|
|
| 454 |
r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
|
| 455 |
r"Provide a concise and direct answer to the question:",
|
| 456 |
r"Answer:",
|
|
|
|
| 255 |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
|
| 256 |
]
|
| 257 |
|
| 258 |
+
def google_search(term, num_results=3, lang="en", timeout=5, safe="active", ssl_verify=None):
|
| 259 |
escaped_term = urllib.parse.quote_plus(term)
|
| 260 |
start = 0
|
| 261 |
all_results = []
|
|
|
|
| 343 |
|
| 344 |
max_attempts = 3
|
| 345 |
context_reduction_factor = 0.7
|
| 346 |
+
max_context_chars = 8000 # Adjust this value as needed
|
| 347 |
|
| 348 |
if web_search:
|
| 349 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
| 350 |
serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
|
| 351 |
|
| 352 |
# Use only the core question for the search
|
| 353 |
+
search_results = google_search(contextualized_question, num_results=3) # Reduced number of results
|
| 354 |
all_answers = []
|
| 355 |
|
| 356 |
for attempt in range(max_attempts):
|
| 357 |
try:
|
| 358 |
+
web_docs = [Document(page_content=result["text"][:2000], metadata={"source": result["link"]}) for result in search_results if result["text"]] # Limit each result to 2000 characters
|
| 359 |
|
| 360 |
if database is None:
|
| 361 |
database = FAISS.from_documents(web_docs, embed)
|
|
|
|
| 365 |
database.save_local("faiss_database")
|
| 366 |
|
| 367 |
context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
|
| 368 |
+
context_str = context_str[:max_context_chars]
|
| 369 |
|
| 370 |
instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
|
| 371 |
|
|
|
|
| 378 |
Topics: {{topics}}
|
| 379 |
Entity Information: {{entities}}
|
| 380 |
{instruction_prompt}
|
| 381 |
+
Provide a concise and relevant answer to the question.
|
|
|
|
|
|
|
| 382 |
"""
|
| 383 |
|
| 384 |
prompt_val = ChatPromptTemplate.from_template(prompt_template)
|
| 385 |
formatted_prompt = prompt_val.format(
|
| 386 |
context=context_str,
|
| 387 |
+
conv_context=chatbot.get_context()[:1000], # Limit conversation context
|
| 388 |
+
question=question,
|
| 389 |
+
topics=", ".join(topics[:5]), # Limit number of topics
|
| 390 |
+
entities=json.dumps({k: v[:3] for k, v in serializable_entity_tracker.items()}) # Limit number of entities
|
| 391 |
)
|
| 392 |
|
| 393 |
full_response = generate_chunked_response(model, formatted_prompt)
|
|
|
|
| 424 |
words = context_str.split()
|
| 425 |
context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
|
| 426 |
|
| 427 |
+
context_str = context_str[:max_context_chars]
|
| 428 |
+
|
| 429 |
prompt_template = """
|
| 430 |
Answer the question based on the following context from the PDF document:
|
| 431 |
Context:
|
| 432 |
{context}
|
| 433 |
Question: {question}
|
|
|
|
| 434 |
Provide a summarized and direct answer to the question.
|
| 435 |
"""
|
| 436 |
|
|
|
|
| 452 |
def extract_answer(full_response, instructions=None):
|
| 453 |
# First, try to split the response at common instruction phrases
|
| 454 |
answer_patterns = [
|
| 455 |
+
r"If the web search results don't contain relevant information, state that the information is not available in the search results\.",
|
| 456 |
+
r"Provide a response that addresses the question and follows the user's instructions\.",
|
| 457 |
+
r"Do not mention these instructions or the web search process in your answer\.",
|
| 458 |
r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
|
| 459 |
r"Provide a concise and direct answer to the question:",
|
| 460 |
r"Answer:",
|