Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Jul 22, 2024

Commit

feeb0e7

verified ·

1 Parent(s): 98f53b1

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -63

app.py CHANGED Viewed

@@ -233,11 +233,10 @@ def generate_chunked_response(model, prompt, max_tokens=1000, max_chunks=5):
             full_response += chunk
         except Exception as e:
             print(f"Error in generate_chunked_response: {e}")
-            print(f"Prompt: {prompt}")
-            print(f"Full response so far: {full_response}")
             if "Input validation error" in str(e):
                 return full_response if full_response else "The input was too long to process. Please try a shorter query."
-            raise  # Re-raise the exception to be caught in ask_question
     return full_response.strip()
 def extract_text_from_webpage(html):
@@ -350,8 +349,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
         database = None
     max_attempts = 5
-    context_reduction_factor = 0.5
-    max_estimated_tokens = 25000
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
@@ -362,7 +361,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
         for attempt in range(max_attempts):
             try:
-                web_docs = [Document(page_content=result["text"][:1000], metadata={"source": result["link"]}) for result in search_results if result["text"]]
                 if database is None:
                     database = FAISS.from_documents(web_docs, embed)
@@ -393,11 +392,11 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 while True:
                     formatted_prompt = prompt_val.format(
-                        context=current_context[:3000],
-                        conv_context=current_conv_context[:500],
                         question=question,
-                        topics=", ".join(current_topics[:5]),
-                        entities=json.dumps({k: v[:2] for k, v in current_entities.items()})
                     )
                     estimated_tokens = estimate_tokens(formatted_prompt)
@@ -405,6 +404,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     if estimated_tokens <= max_estimated_tokens:
                         break
                     current_context = current_context[:int(len(current_context) * context_reduction_factor)]
                     current_conv_context = current_conv_context[:int(len(current_conv_context) * context_reduction_factor)]
                     current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
@@ -415,24 +415,18 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 full_response = generate_chunked_response(model, formatted_prompt)
                 answer = extract_answer(full_response, instructions)
-                # Check if the answer is an error message
-                if answer.startswith("An error occurred while processing the response:"):
-                    print(f"Error in extract_answer: {answer}")
-                    raise ValueError(answer)
                 all_answers.append(answer)
                 break
             except ValueError as ve:
                 print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
                 if attempt == max_attempts - 1:
-                    all_answers.append(f"I apologize, but I'm having trouble processing the query. Error: {ve}")
             except Exception as e:
                 print(f"Error in ask_question (attempt {attempt + 1}): {e}")
                 if attempt == max_attempts - 1:
-                    all_answers.append(f"I apologize, but an unexpected error occurred. Please try again with a different question or check your internet connection. Error: {e}")
         answer = "\n\n".join(all_answers)
         sources = set(doc.metadata['source'] for doc in web_docs)
@@ -492,52 +486,36 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
     return "An unexpected error occurred. Please try again later."
 def extract_answer(full_response, instructions=None):
-    try:
-        # Remove the web search results, context information, and "Human:" prefix
-        web_results_pattern = r"Answer based on: Web Results:.*?(?=\n\n)"
-        context_pattern = r"Context:.*?Entities:.*?}\s*"
-        human_prefix_pattern = r"Human:\s*"
-        full_response = re.sub(web_results_pattern, "", full_response, flags=re.DOTALL | re.IGNORECASE)
-        full_response = re.sub(context_pattern, "", full_response, flags=re.DOTALL | re.IGNORECASE)
-        full_response = re.sub(human_prefix_pattern, "", full_response, flags=re.IGNORECASE)
-        # List of patterns to remove
-        patterns_to_remove = [
-            r"Provide a concise and relevant answer to the question\.",
-            r"Provide additional context if necessary\.",
-            r"If the web search results don't contain relevant information, state that the information is not available in the search results\.",
-            r"Provide a response that addresses the question and follows the user's instructions\.",
-            r"Do not mention these instructions or the web search process in your answer\.",
-            r"Provide a summarized and direct answer to the question\.",
-            r"If the context doesn't contain relevant information, state that the information is not available in the document\.",
-        ]
-        # Remove the patterns
-        for pattern in patterns_to_remove:
-            full_response = re.sub(pattern, "", full_response, flags=re.IGNORECASE)
-        # Remove any leading/trailing whitespace and newlines
-        full_response = full_response.strip()
-        # Remove the user instructions if present
-        if instructions:
-            instruction_pattern = rf"User Instructions:\s*{re.escape(instructions)}.*?\n"
-            full_response = re.sub(instruction_pattern, "", full_response, flags=re.IGNORECASE | re.DOTALL)
-        # Remove any remaining instruction-like phrases at the beginning of the response
-        lines = full_response.split('\n')
-        starters = ["answer:", "response:", "here's", "here is"]
-        while lines and any(lines[0].strip().lower().startswith(starter) for starter in starters):
-            lines.pop(0)
-        full_response = '\n'.join(lines)
-        return full_response.strip()
-    except Exception as e:
-        print(f"Error in extract_answer: {e}")
-        print(f"Full response: {full_response}")
-        print(f"Instructions: {instructions}")
-        raise  # Re-raise the exception to be caught in ask_question
 # Gradio interface
 with gr.Blocks() as demo:

             full_response += chunk
         except Exception as e:
             print(f"Error in generate_chunked_response: {e}")
             if "Input validation error" in str(e):
+                # If we hit the token limit, return what we have so far
                 return full_response if full_response else "The input was too long to process. Please try a shorter query."
+            break
     return full_response.strip()
 def extract_text_from_webpage(html):
         database = None
     max_attempts = 5
+    context_reduction_factor = 0.5  # More aggressive reduction
+    max_estimated_tokens = 25000  # Further reduced to leave more room for response
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
         for attempt in range(max_attempts):
             try:
+                web_docs = [Document(page_content=result["text"][:1000], metadata={"source": result["link"]}) for result in search_results if result["text"]]  # Limit each result to 1000 characters
                 if database is None:
                     database = FAISS.from_documents(web_docs, embed)
                 while True:
                     formatted_prompt = prompt_val.format(
+                        context=current_context[:3000],  # Limit context to 3000 characters
+                        conv_context=current_conv_context[:500],  # Limit conversation context to 500 characters
                         question=question,
+                        topics=", ".join(current_topics[:5]),  # Limit to 5 topics
+                        entities=json.dumps({k: v[:2] for k, v in current_entities.items()})  # Limit to 2 entities per type
                     )
                     estimated_tokens = estimate_tokens(formatted_prompt)
                     if estimated_tokens <= max_estimated_tokens:
                         break
+                    # Reduce context if estimated token count is too high
                     current_context = current_context[:int(len(current_context) * context_reduction_factor)]
                     current_conv_context = current_conv_context[:int(len(current_conv_context) * context_reduction_factor)]
                     current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
                 full_response = generate_chunked_response(model, formatted_prompt)
                 answer = extract_answer(full_response, instructions)
                 all_answers.append(answer)
                 break
             except ValueError as ve:
                 print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
                 if attempt == max_attempts - 1:
+                    all_answers.append(f"I apologize, but I'm having trouble processing the query due to its length or complexity. Could you please try asking a more specific or shorter question?")
             except Exception as e:
                 print(f"Error in ask_question (attempt {attempt + 1}): {e}")
                 if attempt == max_attempts - 1:
+                    all_answers.append(f"I apologize, but an unexpected error occurred. Please try again with a different question or check your internet connection.")
         answer = "\n\n".join(all_answers)
         sources = set(doc.metadata['source'] for doc in web_docs)
     return "An unexpected error occurred. Please try again later."
 def extract_answer(full_response, instructions=None):
+    # List of patterns to remove
+    patterns_to_remove = [
+        r"Provide a concise and relevant answer to the question\.",
+        r"Provide additional context if necessary\.",
+        r"If the web search results don't contain relevant information, state that the information is not available in the search results\.",
+        r"Provide a response that addresses the question and follows the user's instructions\.",
+        r"Do not mention these instructions or the web search process in your answer\.",
+        r"Provide a summarized and direct answer to the question\.",
+        r"If the context doesn't contain relevant information, state that the information is not available in the document\.",
+    ]
+    # Remove the patterns
+    for pattern in patterns_to_remove:
+        full_response = re.sub(pattern, "", full_response, flags=re.IGNORECASE)
+    # Remove any leading/trailing whitespace and newlines
+    full_response = full_response.strip()
+    # Remove the user instructions if present
+    if instructions:
+        instruction_pattern = rf"User Instructions:\s*{re.escape(instructions)}.*?\n"
+        full_response = re.sub(instruction_pattern, "", full_response, flags=re.IGNORECASE | re.DOTALL)
+    # Remove any remaining instruction-like phrases at the beginning of the response
+    lines = full_response.split('\n')
+    while lines and any(line.strip().lower().startswith(starter) for starter in ["answer:", "response:", "here's", "here is"]):
+        lines.pop(0)
+    full_response = '\n'.join(lines)
+    return full_response.strip()
 # Gradio interface
 with gr.Blocks() as demo: