Update app.py
Browse files
app.py
CHANGED
@@ -712,51 +712,16 @@ def search_and_scrape(query, chat_history, num_results=5, max_chars=3000, time_r
|
|
712 |
logger.error(f"Unexpected error in search_and_scrape: {e}")
|
713 |
return f"An unexpected error occurred during the search and scrape process: {e}"
|
714 |
|
715 |
-
|
716 |
-
|
717 |
-
self.id = id
|
718 |
-
self.title = title
|
719 |
-
self.messages = []
|
720 |
-
|
721 |
-
def add_message(self, role, content):
|
722 |
-
self.messages.append({"role": role, "content": content})
|
723 |
-
|
724 |
-
class ConversationManager:
|
725 |
-
def __init__(self):
|
726 |
-
self.conversations = {}
|
727 |
-
self.current_conversation_id = None
|
728 |
-
|
729 |
-
def create_conversation(self, title):
|
730 |
-
conversation_id = str(random.randint(1000, 9999))
|
731 |
-
self.conversations[conversation_id] = Conversation(conversation_id, title)
|
732 |
-
self.current_conversation_id = conversation_id
|
733 |
-
return conversation_id
|
734 |
-
|
735 |
-
def get_conversation(self, conversation_id):
|
736 |
-
return self.conversations.get(conversation_id)
|
737 |
-
|
738 |
-
def get_conversation_list(self):
|
739 |
-
return [{"id": conv.id, "title": conv.title} for conv in self.conversations.values()]
|
740 |
-
|
741 |
-
conversation_manager = ConversationManager()
|
742 |
-
|
743 |
-
def chat_function(message, history, conversation_id, num_results, max_chars, time_range, language, category, engines, safesearch, method, llm_temperature, model, use_pydf2):
|
744 |
-
if not conversation_id:
|
745 |
-
conversation_id = conversation_manager.create_conversation(message[:30] + "...")
|
746 |
-
|
747 |
-
conversation = conversation_manager.get_conversation(conversation_id)
|
748 |
-
if not conversation:
|
749 |
-
return "Error: Conversation not found", conversation_id, conversation_manager.get_conversation_list()
|
750 |
-
|
751 |
-
conversation.add_message("user", message)
|
752 |
-
|
753 |
-
chat_history = "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation.messages])
|
754 |
|
755 |
query_type = determine_query_type(message, chat_history, client)
|
756 |
|
757 |
if query_type == "knowledge_base":
|
758 |
response = generate_ai_response(message, chat_history, client, model)
|
759 |
else: # web_search
|
|
|
|
|
760 |
response = search_and_scrape(
|
761 |
query=message,
|
762 |
chat_history=chat_history,
|
@@ -773,72 +738,44 @@ def chat_function(message, history, conversation_id, num_results, max_chars, tim
|
|
773 |
use_pydf2=use_pydf2
|
774 |
)
|
775 |
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
with gr.Row():
|
793 |
-
with gr.Column(scale=1):
|
794 |
-
conversation_list = gr.Dataframe(
|
795 |
-
headers=["id", "title"],
|
796 |
-
datatype=["str", "str"],
|
797 |
-
label="Conversations",
|
798 |
-
interactive=False
|
799 |
-
)
|
800 |
-
|
801 |
-
with gr.Column(scale=3):
|
802 |
-
chatbot = gr.Chatbot(height=500)
|
803 |
-
msg = gr.Textbox(label="Type your message here...")
|
804 |
-
send = gr.Button("Send")
|
805 |
-
|
806 |
-
with gr.Accordion("Advanced Parameters", open=False):
|
807 |
-
num_results = gr.Slider(5, 20, value=10, step=1, label="Number of initial results")
|
808 |
-
max_chars = gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve")
|
809 |
-
time_range = gr.Dropdown(["", "day", "week", "month", "year"], value="", label="Time Range")
|
810 |
-
language = gr.Dropdown(["", "all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="", label="Language")
|
811 |
-
category = gr.Dropdown(["", "general", "news", "images", "videos", "music", "files", "it", "science", "social media"], value="", label="Category")
|
812 |
-
engines = gr.Dropdown(
|
813 |
["google", "bing", "duckduckgo", "baidu", "yahoo", "qwant", "startpage"],
|
814 |
multiselect=True,
|
815 |
value=["google", "duckduckgo", "bing", "qwant"],
|
816 |
label="Engines"
|
817 |
-
)
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
|
827 |
-
|
828 |
-
|
829 |
-
|
830 |
-
|
831 |
-
|
832 |
-
|
833 |
-
[chatbot, conversation_id, conversation_list]
|
834 |
-
)
|
835 |
-
|
836 |
-
msg.submit(
|
837 |
-
bot,
|
838 |
-
[msg, chatbot, conversation_id, num_results, max_chars, time_range, language, category, engines, safesearch, method, llm_temperature, model, use_pydf2],
|
839 |
-
[chatbot, conversation_id, conversation_list]
|
840 |
)
|
|
|
841 |
|
842 |
-
|
843 |
-
|
844 |
-
iface.launch(share=True)
|
|
|
712 |
logger.error(f"Unexpected error in search_and_scrape: {e}")
|
713 |
return f"An unexpected error occurred during the search and scrape process: {e}"
|
714 |
|
715 |
+
def chat_function(message: str, history: List[Tuple[str, str]], num_results: int, max_chars: int, time_range: str, language: str, category: str, engines: List[str], safesearch: int, method: str, llm_temperature: float, model: str, use_pydf2: bool):
|
716 |
+
chat_history = "\n".join([f"{role}: {msg}" for role, msg in history])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
717 |
|
718 |
query_type = determine_query_type(message, chat_history, client)
|
719 |
|
720 |
if query_type == "knowledge_base":
|
721 |
response = generate_ai_response(message, chat_history, client, model)
|
722 |
else: # web_search
|
723 |
+
gr.Info("Initiating Web Search")
|
724 |
+
yield "Request you to sit back and relax until I scrape the web for up-to-date information"
|
725 |
response = search_and_scrape(
|
726 |
query=message,
|
727 |
chat_history=chat_history,
|
|
|
738 |
use_pydf2=use_pydf2
|
739 |
)
|
740 |
|
741 |
+
yield response
|
742 |
+
|
743 |
+
|
744 |
+
iface = gr.ChatInterface(
|
745 |
+
chat_function,
|
746 |
+
title="Web Scraper for Financial News with Sentinel AI",
|
747 |
+
description="Ask Sentinel any question. It will search the web for recent information or use its knowledge base as appropriate.",
|
748 |
+
theme=gr.Theme.from_hub("allenai/gradio-theme"),
|
749 |
+
additional_inputs=[
|
750 |
+
gr.Slider(5, 20, value=10, step=1, label="Number of initial results"),
|
751 |
+
gr.Slider(500, 10000, value=1500, step=100, label="Max characters to retrieve"),
|
752 |
+
gr.Dropdown(["", "day", "week", "month", "year"], value="", label="Time Range"),
|
753 |
+
gr.Dropdown(["", "all", "en", "fr", "de", "es", "it", "nl", "pt", "pl", "ru", "zh"], value="", label="Language"),
|
754 |
+
gr.Dropdown(["", "general", "news", "images", "videos", "music", "files", "it", "science", "social media"], value="", label="Category"),
|
755 |
+
gr.Dropdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
756 |
["google", "bing", "duckduckgo", "baidu", "yahoo", "qwant", "startpage"],
|
757 |
multiselect=True,
|
758 |
value=["google", "duckduckgo", "bing", "qwant"],
|
759 |
label="Engines"
|
760 |
+
),
|
761 |
+
gr.Slider(0, 2, value=2, step=1, label="Safe Search Level"),
|
762 |
+
gr.Radio(["GET", "POST"], value="POST", label="HTTP Method"),
|
763 |
+
gr.Slider(0, 1, value=0.2, step=0.1, label="LLM Temperature"),
|
764 |
+
gr.Dropdown(["huggingface", "groq", "mistral"], value="mistral", label="LLM Model"),
|
765 |
+
gr.Checkbox(label="Use PyPDF2 for PDF scraping", value=False),
|
766 |
+
],
|
767 |
+
additional_inputs_accordion=gr.Accordion("⚙️ Advanced Parameters", open=True),
|
768 |
+
retry_btn="Retry",
|
769 |
+
undo_btn="Undo",
|
770 |
+
clear_btn="Clear",
|
771 |
+
chatbot=gr.Chatbot(
|
772 |
+
show_copy_button=True,
|
773 |
+
likeable=True,
|
774 |
+
layout="bubble",
|
775 |
+
height=500,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
776 |
)
|
777 |
+
)
|
778 |
|
779 |
+
if __name__ == "__main__":
|
780 |
+
logger.info("Starting the SearXNG Scraper for Financial News using ChatInterface with Advanced Parameters")
|
781 |
+
iface.launch(share=True)
|