Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Jul 23, 2024

Commit

a491b68

verified ·

1 Parent(s): ebcb412

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -23

app.py CHANGED Viewed

@@ -352,7 +352,7 @@ def estimate_tokens(text):
     # Rough estimate: 1 token ~= 4 characters
     return len(text) // 4
-def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot):
     if not question:
         return "Please enter a question."
@@ -368,16 +368,15 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
     else:
         database = None
-    max_attempts = 3  # Define the maximum number of attempts
     context_reduction_factor = 0.7
-    max_tokens = 32000  # Maximum tokens allowed by the model
     if web_search:
-        contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
-        # Log the contextualized question and instructions separately for debugging
         print(f"Contextualized question: {contextualized_question}")
-        print(f"Instructions: {instructions}")
         try:
             search_results = google_search(contextualized_question, num_results=3)
@@ -403,7 +402,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
-                instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
                 prompt_template = f"""
                 Answer the question based on the following web search results, conversation context, entity information, and user instructions:
@@ -419,7 +418,6 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 prompt_val = ChatPromptTemplate.from_template(prompt_template)
-                # Start with full context and progressively reduce if necessary
                 current_context = context_str
                 current_conv_context = chatbot.get_context()
                 current_topics = topics
@@ -434,13 +432,11 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                         entities=json.dumps(current_entities)
                     )
-                    # Estimate token count (rough estimate)
                     estimated_tokens = len(formatted_prompt) // 4
-                    if estimated_tokens <= max_tokens - 1000:  # Leave 1000 tokens for the model's response
                         break
-                    # Reduce context if estimated token count is too high
                     current_context = current_context[:int(len(current_context) * context_reduction_factor)]
                     current_conv_context = current_conv_context[:int(len(current_conv_context) * context_reduction_factor)]
                     current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
@@ -450,7 +446,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                         raise ValueError("Context reduced too much. Unable to process the query.")
                 full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
-                answer = extract_answer(full_response, instructions)
                 all_answers.append(answer)
                 break
@@ -469,12 +465,11 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
         sources_section = "\n\nSources:\n" + "\n".join(f"- {source}" for source in sources)
         answer += sources_section
-        # Update chatbot context with the answer
         chatbot.add_to_history(answer)
         return answer
-    else:  # PDF document chat
         for attempt in range(max_attempts):
             try:
                 if database is None:
@@ -484,11 +479,14 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 relevant_docs = retriever.get_relevant_documents(question)
                 context_str = "\n".join([doc.page_content for doc in relevant_docs])
-                prompt_template = """
                 Answer the question based on the following context from the PDF document:
                 Context:
-                {context}
-                Question: {question}
                 Provide a summarized and direct answer to the question.
                 """
@@ -498,17 +496,16 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     estimated_tokens = estimate_tokens(formatted_prompt)
-                    if estimated_tokens <= max_tokens - 1000:  # Leave 1000 tokens for the model's response
                         break
-                    # Reduce context if estimated token count is too high
                     context_str = context_str[:int(len(context_str) * context_reduction_factor)]
                     if len(context_str) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
                 full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
-                answer = extract_answer(full_response)
                 return answer
@@ -524,6 +521,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
     return "An unexpected error occurred. Please try again later."
 def extract_answer(full_response, instructions=None):
     answer_patterns = [
         r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
@@ -575,6 +573,7 @@ with gr.Blocks() as demo:
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Conversation")
             question_input = gr.Textbox(label="Ask a question")
             submit_button = gr.Button("Submit")
         with gr.Column(scale=1):
             temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
@@ -584,12 +583,12 @@ with gr.Blocks() as demo:
     enhanced_context_driven_chatbot = EnhancedContextDrivenChatbot()
-    def chat(question, history, temperature, top_p, repetition_penalty, web_search):
-        answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, enhanced_context_driven_chatbot)
         history.append((question, answer))
         return "", history
-    submit_button.click(chat, inputs=[question_input, chatbot, temperature_slider, top_p_slider, repetition_penalty_slider, web_search_checkbox], outputs=[question_input, chatbot])
     clear_button = gr.Button("Clear Cache")
     clear_output = gr.Textbox(label="Cache Status")

     # Rough estimate: 1 token ~= 4 characters
     return len(text) // 4
+def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
     if not question:
         return "Please enter a question."
     else:
         database = None
+    max_attempts = 3
     context_reduction_factor = 0.7
+    max_tokens = 32000
     if web_search:
+        contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
         print(f"Contextualized question: {contextualized_question}")
+        print(f"User Instructions: {user_instructions}")
         try:
             search_results = google_search(contextualized_question, num_results=3)
                 context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
+                instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
                 prompt_template = f"""
                 Answer the question based on the following web search results, conversation context, entity information, and user instructions:
                 prompt_val = ChatPromptTemplate.from_template(prompt_template)
                 current_context = context_str
                 current_conv_context = chatbot.get_context()
                 current_topics = topics
                         entities=json.dumps(current_entities)
                     )
                     estimated_tokens = len(formatted_prompt) // 4
+                    if estimated_tokens <= max_tokens - 1000:
                         break
                     current_context = current_context[:int(len(current_context) * context_reduction_factor)]
                     current_conv_context = current_conv_context[:int(len(current_conv_context) * context_reduction_factor)]
                     current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
                         raise ValueError("Context reduced too much. Unable to process the query.")
                 full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
+                answer = extract_answer(full_response, user_instructions)
                 all_answers.append(answer)
                 break
         sources_section = "\n\nSources:\n" + "\n".join(f"- {source}" for source in sources)
         answer += sources_section
         chatbot.add_to_history(answer)
         return answer
+     else:  # PDF document chat
         for attempt in range(max_attempts):
             try:
                 if database is None:
                 relevant_docs = retriever.get_relevant_documents(question)
                 context_str = "\n".join([doc.page_content for doc in relevant_docs])
+                instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
+                prompt_template = f"""
                 Answer the question based on the following context from the PDF document:
                 Context:
+                {{context}}
+                Question: {{question}}
+                {instruction_prompt}
                 Provide a summarized and direct answer to the question.
                 """
                     estimated_tokens = estimate_tokens(formatted_prompt)
+                    if estimated_tokens <= max_tokens - 1000:
                         break
                     context_str = context_str[:int(len(context_str) * context_reduction_factor)]
                     if len(context_str) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
                 full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
+                answer = extract_answer(full_response, user_instructions)
                 return answer
     return "An unexpected error occurred. Please try again later."
 def extract_answer(full_response, instructions=None):
     answer_patterns = [
         r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(label="Conversation")
             question_input = gr.Textbox(label="Ask a question")
+            instructions_input = gr.Textbox(label="Instructions for response (optional)", placeholder="Enter any specific instructions for the response here")
             submit_button = gr.Button("Submit")
         with gr.Column(scale=1):
             temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
     enhanced_context_driven_chatbot = EnhancedContextDrivenChatbot()
+    def chat(question, history, temperature, top_p, repetition_penalty, web_search, user_instructions):
+        answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, enhanced_context_driven_chatbot, user_instructions)
         history.append((question, answer))
         return "", history
+    submit_button.click(chat, inputs=[question_input, chatbot, temperature_slider, top_p_slider, repetition_penalty_slider, web_search_checkbox, instructions_input], outputs=[question_input, chatbot])
     clear_button = gr.Button("Clear Cache")
     clear_output = gr.Textbox(label="Cache Status")