Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Jul 23, 2024

Commit

3cb16ec

verified ·

1 Parent(s): 85693d5

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -24

app.py CHANGED Viewed

@@ -22,7 +22,12 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.llms import HuggingFaceHub
 from langchain_core.documents import Document
 from sentence_transformers import SentenceTransformer
-from llama_parse import LlamaParse
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
@@ -378,10 +383,25 @@ def prepare_context(query: str, documents: List[Document], max_tokens: int) -> s
     return truncate_text(context, max_tokens)
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
     if not question:
         return "Please enter a question."
     model = get_model(temperature, top_p, repetition_penalty)
     # Update the chatbot's model
@@ -395,17 +415,14 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
         database = None
     max_attempts = 3
-    max_input_tokens = 20000  # Leave room for the model's response
     max_output_tokens = 800
     if web_search:
         contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
-        print(f"Contextualized question: {contextualized_question}")
-        print(f"User Instructions: {user_instructions}")
         try:
-            search_results = google_search(contextualized_question, num_results=5)  # Increased from 3 to 5
         except Exception as e:
             print(f"Error in web search: {e}")
             return f"I apologize, but I encountered an error while searching for information: {str(e)}"
@@ -426,8 +443,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 database.save_local("faiss_database")
-                # Prepare context using reranking
-                context_str = prepare_context(contextualized_question, web_docs, max_input_tokens // 2)  # Use half of max_input_tokens for context
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
@@ -443,13 +459,11 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 Provide a concise and relevant answer to the question.
                 """
-                prompt_val = ChatPromptTemplate.from_template(prompt_template)
-                current_conv_context = truncate_text(chatbot.get_context(), max_input_tokens // 4)  # Use quarter of max_input_tokens for conversation context
-                current_topics = topics[:5]  # Limit to top 5 topics
-                current_entities = {k: list(v)[:3] for k, v in entity_tracker.items()}  # Limit to top 3 entities per type
-                formatted_prompt = prompt_val.format(
                     context=context_str,
                     conv_context=current_conv_context,
                     question=question,
@@ -461,12 +475,17 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     formatted_prompt = truncate_text(formatted_prompt, max_input_tokens)
                 try:
-                    full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
                     answer = extract_answer(full_response, user_instructions)
                     all_answers.append(answer)
                     break
                 except Exception as e:
-                    print(f"Error in generate_chunked_response: {e}")
                     if attempt == max_attempts - 1:
                         all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
@@ -490,11 +509,10 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 if database is None:
                     return "No documents available. Please upload PDF documents to answer questions."
-                retriever = database.as_retriever(search_kwargs={"k": 10})  # Retrieve more documents for reranking
                 relevant_docs = retriever.get_relevant_documents(question)
-                # Prepare context using reranking
-                context_str = prepare_context(question, relevant_docs, max_input_tokens // 2)  # Use half of max_input_tokens for context
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
@@ -507,18 +525,22 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 Provide a summarized and direct answer to the question.
                 """
-                prompt_val = ChatPromptTemplate.from_template(prompt_template)
-                formatted_prompt = prompt_val.format(context=context_str, question=question)
                 if estimate_tokens(formatted_prompt) > max_input_tokens:
                     formatted_prompt = truncate_text(formatted_prompt, max_input_tokens)
                 try:
-                    full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
                     answer = extract_answer(full_response, user_instructions)
                     return answer
                 except Exception as e:
-                    print(f"Error in generate_chunked_response: {e}")
                     if attempt == max_attempts - 1:
                         return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
@@ -591,13 +613,14 @@ with gr.Blocks() as demo:
     enhanced_context_driven_chatbot = EnhancedContextDrivenChatbot()
     def chat(question, history, temperature, top_p, repetition_penalty, web_search, user_instructions):
         answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, enhanced_context_driven_chatbot, user_instructions)
         history.append((question, answer))
         return "", history
     submit_button.click(chat, inputs=[question_input, chatbot, temperature_slider, top_p_slider, repetition_penalty_slider, web_search_checkbox, instructions_input], outputs=[question_input, chatbot])
     clear_button = gr.Button("Clear Cache")
     clear_output = gr.Textbox(label="Cache Status")
     clear_button.click(clear_cache, inputs=[], outputs=clear_output)

 from langchain_community.llms import HuggingFaceHub
 from langchain_core.documents import Document
 from sentence_transformers import SentenceTransformer
+from llama_parse import
+from llama_cpp import Llama
+from llama_cpp_agent.llm_agent import LlamaCppAgent
+from llama_cpp_agent.messages_formatter import MessagesFormatterType
+from llama_cpp_agent.providers.llama_cpp_endpoint_provider import LlamaCppEndpointSettings
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
     return truncate_text(context, max_tokens)
+# Initialize LlamaCppAgent
+def initialize_llama_cpp_agent():
+    main_model = LlamaCppEndpointSettings(
+        completions_endpoint_url="http://127.0.0.1:8080/completion"
+    )
+    llama_cpp_agent = LlamaCppAgent(
+        main_model,
+        debug_output=False,
+        system_prompt="You are an AI assistant designed to help with RAG tasks.",
+        predefined_messages_formatter_type=MessagesFormatterType.CHATML
+    )
+    return llama_cpp_agent
+# Modify the ask_question function to use LlamaCppAgent
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
     if not question:
         return "Please enter a question."
+    llama_cpp_agent = initialize_llama_cpp_agent()
     model = get_model(temperature, top_p, repetition_penalty)
     # Update the chatbot's model
         database = None
     max_attempts = 3
+    max_input_tokens = 20000
     max_output_tokens = 800
     if web_search:
         contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
         try:
+            search_results = google_search(contextualized_question, num_results=5)
         except Exception as e:
             print(f"Error in web search: {e}")
             return f"I apologize, but I encountered an error while searching for information: {str(e)}"
                 database.save_local("faiss_database")
+                context_str = prepare_context(contextualized_question, web_docs, max_input_tokens // 2)
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
                 Provide a concise and relevant answer to the question.
                 """
+                current_conv_context = truncate_text(chatbot.get_context(), max_input_tokens // 4)
+                current_topics = topics[:5]
+                current_entities = {k: list(v)[:3] for k, v in entity_tracker.items()}
+                formatted_prompt = prompt_template.format(
                     context=context_str,
                     conv_context=current_conv_context,
                     question=question,
                     formatted_prompt = truncate_text(formatted_prompt, max_input_tokens)
                 try:
+                    # Use LlamaCppAgent for initial response generation
+                    initial_response = llama_cpp_agent.get_chat_response(formatted_prompt, temperature=temperature)
+                    # Use generate_chunked_response for further refinement if needed
+                    full_response = generate_chunked_response(model, initial_response, max_tokens=max_output_tokens)
                     answer = extract_answer(full_response, user_instructions)
                     all_answers.append(answer)
                     break
                 except Exception as e:
+                    print(f"Error in response generation: {e}")
                     if attempt == max_attempts - 1:
                         all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
                 if database is None:
                     return "No documents available. Please upload PDF documents to answer questions."
+                retriever = database.as_retriever(search_kwargs={"k": 5})
                 relevant_docs = retriever.get_relevant_documents(question)
+                context_str = prepare_context(question, relevant_docs, max_input_tokens // 2)
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
                 Provide a summarized and direct answer to the question.
                 """
+                formatted_prompt = prompt_template.format(context=context_str, question=question)
                 if estimate_tokens(formatted_prompt) > max_input_tokens:
                     formatted_prompt = truncate_text(formatted_prompt, max_input_tokens)
                 try:
+                    # Use LlamaCppAgent for initial response generation
+                    initial_response = llama_cpp_agent.get_chat_response(formatted_prompt, temperature=temperature)
+                    # Use generate_chunked_response for further refinement if needed
+                    full_response = generate_chunked_response(model, initial_response, max_tokens=max_output_tokens)
                     answer = extract_answer(full_response, user_instructions)
                     return answer
                 except Exception as e:
+                    print(f"Error in response generation: {e}")
                     if attempt == max_attempts - 1:
                         return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
     enhanced_context_driven_chatbot = EnhancedContextDrivenChatbot()
+    # Update the chat function to use the modified ask_question function
     def chat(question, history, temperature, top_p, repetition_penalty, web_search, user_instructions):
         answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, enhanced_context_driven_chatbot, user_instructions)
         history.append((question, answer))
         return "", history
     submit_button.click(chat, inputs=[question_input, chatbot, temperature_slider, top_p_slider, repetition_penalty_slider, web_search_checkbox, instructions_input], outputs=[question_input, chatbot])
     clear_button = gr.Button("Clear Cache")
     clear_output = gr.Textbox(label="Cache Status")
     clear_button.click(clear_cache, inputs=[], outputs=clear_output)