Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Jul 22, 2024

Commit

d7a112f

verified ·

1 Parent(s): 20ff049

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -11

app.py CHANGED Viewed

@@ -255,7 +255,7 @@ _useragent_list = [
     "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
 ]
-def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_verify=None):
     escaped_term = urllib.parse.quote_plus(term)
     start = 0
     all_results = []
@@ -343,18 +343,19 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
     max_attempts = 3
     context_reduction_factor = 0.7
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
         serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
         # Use only the core question for the search
-        search_results = google_search(contextualized_question)
         all_answers = []
         for attempt in range(max_attempts):
             try:
-                web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
                 if database is None:
                     database = FAISS.from_documents(web_docs, embed)
@@ -364,6 +365,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 database.save_local("faiss_database")
                 context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
                 instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
@@ -376,18 +378,16 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 Topics: {{topics}}
                 Entity Information: {{entities}}
                 {instruction_prompt}
-                If the web search results don't contain relevant information, state that the information is not available in the search results.
-                Provide a response that addresses the question and follows the user's instructions.
-                Do not mention these instructions or the web search process in your answer.
                 """
                 prompt_val = ChatPromptTemplate.from_template(prompt_template)
                 formatted_prompt = prompt_val.format(
                     context=context_str,
-                    conv_context=chatbot.get_context(),
-                    question=question,  # Use the original question here
-                    topics=", ".join(topics),
-                    entities=json.dumps(serializable_entity_tracker)
                 )
                 full_response = generate_chunked_response(model, formatted_prompt)
@@ -424,12 +424,13 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     words = context_str.split()
                     context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
                 prompt_template = """
                 Answer the question based on the following context from the PDF document:
                 Context:
                 {context}
                 Question: {question}
-                If the context doesn't contain relevant information, state that the information is not available in the document.
                 Provide a summarized and direct answer to the question.
                 """
@@ -451,6 +452,9 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
 def extract_answer(full_response, instructions=None):
     # First, try to split the response at common instruction phrases
     answer_patterns = [
         r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
         r"Provide a concise and direct answer to the question:",
         r"Answer:",

     "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36",
 ]
+def google_search(term, num_results=3, lang="en", timeout=5, safe="active", ssl_verify=None):
     escaped_term = urllib.parse.quote_plus(term)
     start = 0
     all_results = []
     max_attempts = 3
     context_reduction_factor = 0.7
+    max_context_chars = 8000  # Adjust this value as needed
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
         serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
         # Use only the core question for the search
+        search_results = google_search(contextualized_question, num_results=3)  # Reduced number of results
         all_answers = []
         for attempt in range(max_attempts):
             try:
+                web_docs = [Document(page_content=result["text"][:2000], metadata={"source": result["link"]}) for result in search_results if result["text"]]  # Limit each result to 2000 characters
                 if database is None:
                     database = FAISS.from_documents(web_docs, embed)
                 database.save_local("faiss_database")
                 context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
+                context_str = context_str[:max_context_chars]
                 instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
                 Topics: {{topics}}
                 Entity Information: {{entities}}
                 {instruction_prompt}
+                Provide a concise and relevant answer to the question.
                 """
                 prompt_val = ChatPromptTemplate.from_template(prompt_template)
                 formatted_prompt = prompt_val.format(
                     context=context_str,
+                    conv_context=chatbot.get_context()[:1000],  # Limit conversation context
+                    question=question,
+                    topics=", ".join(topics[:5]),  # Limit number of topics
+                    entities=json.dumps({k: v[:3] for k, v in serializable_entity_tracker.items()})  # Limit number of entities
                 )
                 full_response = generate_chunked_response(model, formatted_prompt)
                     words = context_str.split()
                     context_str = " ".join(words[:int(len(words) * context_reduction_factor)])
+                context_str = context_str[:max_context_chars]
                 prompt_template = """
                 Answer the question based on the following context from the PDF document:
                 Context:
                 {context}
                 Question: {question}
                 Provide a summarized and direct answer to the question.
                 """
 def extract_answer(full_response, instructions=None):
     # First, try to split the response at common instruction phrases
     answer_patterns = [
+        r"If the web search results don't contain relevant information, state that the information is not available in the search results\.",
+        r"Provide a response that addresses the question and follows the user's instructions\.",
+        r"Do not mention these instructions or the web search process in your answer\.",
         r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
         r"Provide a concise and direct answer to the question:",
         r"Answer:",