Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

add_poc_french_local_insights

#18

by timeki - opened 3 days ago

base: refs/heads/main

←

from: refs/pr/18

Discussion Files changed

+2472

-1695

Files changed (28) hide show

.gitignore +2 -0
app.py +214 -597
climateqa/chat.py +198 -0
climateqa/constants.py +2 -0
climateqa/engine/chains/answer_rag.py +3 -2
climateqa/engine/chains/graph_retriever.py +3 -1
climateqa/engine/chains/prompts.py +30 -3
climateqa/engine/chains/query_transformation.py +166 -69
climateqa/engine/chains/retrieve_documents.py +220 -58
climateqa/engine/chains/retrieve_papers.py +2 -2
climateqa/engine/graph.py +166 -24
climateqa/engine/reranker.py +5 -0
climateqa/{event_handler.py → handle_stream_events.py} +15 -12
front/deprecated.py +46 -0
front/event_listeners.py +0 -0
front/tabs/__init__.py +6 -0
front/tabs/chat_interface.py +55 -0
front/tabs/main_tab.py +69 -0
front/tabs/tab_about.py +38 -0
front/tabs/tab_config.py +123 -0
front/tabs/tab_examples.py +40 -0
front/tabs/tab_figures.py +31 -0
front/tabs/tab_papers.py +36 -0
front/tabs/tab_recommended_content.py +0 -0
front/utils.py +25 -15
requirements.txt +1 -1
sandbox/20241104 - CQA - StepByStep CQA.ipynb +0 -0
style.css +396 -490

.gitignore CHANGED Viewed

@@ -11,3 +11,5 @@ notebooks/
 data/
 sandbox/

 data/
 sandbox/
+*.db

app.py CHANGED Viewed

@@ -1,54 +1,30 @@
-from climateqa.engine.embeddings import get_embeddings_function
-embeddings_function = get_embeddings_function()
-from sentence_transformers import CrossEncoder
-# reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
-import gradio as gr
-from gradio_modal import Modal
-import pandas as pd
-import numpy as np
 import os
-import time
-import re
-import json
-from gradio import ChatMessage
-# from gradio_modal import Modal
-from io import BytesIO
-import base64
-from datetime import datetime
 from azure.storage.fileshare import ShareServiceClient
-from utils import create_user_id
-from gradio_modal import Modal
-from PIL import Image
-from langchain_core.runnables.schema import StreamEvent
-# ClimateQ&A imports
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
-# from climateqa.knowledge.retriever import ClimateQARetriever
 from climateqa.engine.reranker import get_reranker
-from climateqa.engine.embeddings import get_embeddings_function
-from climateqa.engine.chains.prompts import audience_prompts
-from climateqa.sample_questions import QUESTIONS
-from climateqa.constants import POSSIBLE_REPORTS, OWID_CATEGORIES
-from climateqa.utils import get_image_from_azure_blob_storage
-from climateqa.engine.graph import make_graph_agent
-from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.chains.retrieve_papers import find_papers
-from front.utils import serialize_docs,process_figures
-from climateqa.event_handler import init_audience, handle_retrieved_documents, stream_answer,handle_retrieved_owid_graphs
 # Load environment variables in local mode
 try:
@@ -57,7 +33,6 @@ try:
 except Exception as e:
     pass
-import requests
 # Set up Gradio Theme
 theme = gr.themes.Base(
@@ -66,15 +41,7 @@ theme = gr.themes.Base(
     font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
 )
-init_prompt = ""
-system_template = {
-    "role": "system",
-    "content": init_prompt,
-}
 account_key = os.environ["BLOB_ACCOUNT_KEY"]
 if len(account_key) == 86:
     account_key += "=="
@@ -92,586 +59,236 @@ share_client = service.get_share_client(file_share_name)
 user_id = create_user_id()
-CITATION_LABEL = "BibTeX citation for ClimateQ&A"
-CITATION_TEXT = r"""@misc{climateqa,
-    author={Théo Alves Da Costa, Timothée Bohe},
-    title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
-    year={2024},
-    howpublished= {\url{https://climateqa.com}},
-}
-@software{climateqa,
-    author = {Théo Alves Da Costa, Timothée Bohe},
-    publisher = {ClimateQ&A},
-    title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
-}
-"""
 # Create vectorstore and retriever
-vectorstore = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX"))
-vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name = os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
-reranker = get_reranker("nano")
-agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
-def update_config_modal_visibility(config_open):
-    new_config_visibility_status = not config_open
-    return gr.update(visible=new_config_visibility_status), new_config_visibility_status
-async def chat(query, history, audience, sources, reports, relevant_content_sources, search_only):
-    """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
-    (messages in gradio format, messages in langchain format, source documents)"""
-    date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    print(f">> NEW QUESTION ({date_now}) : {query}")
-    audience_prompt = init_audience(audience)
-    # Prepare default values
-    if sources is None or len(sources) == 0:
-        sources = ["IPCC", "IPBES", "IPOS"]
-    if reports is None or len(reports) == 0:
-        reports = []
-    inputs = {"user_input": query,"audience": audience_prompt,"sources_input":sources, "relevant_content_sources" : relevant_content_sources, "search_only": search_only}
-    result = agent.astream_events(inputs,version = "v1")
-    docs = []
-    used_figures=[]
-    related_contents = []
-    docs_html = ""
-    output_query = ""
-    output_language = ""
-    output_keywords = ""
-    start_streaming = False
-    graphs_html = ""
-    figures = '<div class="figures-container"><p></p> </div>'
-    steps_display = {
-        "categorize_intent":("🔄️ Analyzing user message",True),
-        "transform_query":("🔄️ Thinking step by step to answer the question",True),
-        "retrieve_documents":("🔄️ Searching in the knowledge base",False),
-    }
-    used_documents = []
-    answer_message_content = ""
-    try:
-        async for event in result:
-            if "langgraph_node" in event["metadata"]:
-                node = event["metadata"]["langgraph_node"]
-                if event["event"] == "on_chain_end" and event["name"] == "retrieve_documents" :# when documents are retrieved
-                    docs, docs_html, history, used_documents, related_contents = handle_retrieved_documents(event, history, used_documents)
-                elif event["event"] == "on_chain_end" and node == "categorize_intent" and event["name"] == "_write": # when the query is transformed
-                    intent = event["data"]["output"]["intent"]
-                    if "language" in event["data"]["output"]:
-                        output_language = event["data"]["output"]["language"]
-                    else :
-                        output_language = "English"
-                    history[-1].content = f"Language identified : {output_language} \n Intent identified : {intent}"
-                elif event["name"] in steps_display.keys() and event["event"] == "on_chain_start": #display steps
-                    event_description, display_output = steps_display[node]
-                    if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
-                        history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
-                elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search","answer_chitchat"]:# if streaming answer
-                    history, start_streaming, answer_message_content = stream_answer(history, event, start_streaming, answer_message_content)
-                elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
-                    graphs_html = handle_retrieved_owid_graphs(event, graphs_html)
-                if event["name"] == "transform_query" and event["event"] =="on_chain_end":
-                    if hasattr(history[-1],"content"):
-                        history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
-                if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
-                    print("X")
-            yield history, docs_html, output_query, output_language, related_contents , graphs_html,  #,output_query,output_keywords
-    except Exception as e:
-        print(event, "has failed")
-        raise gr.Error(f"{e}")
-    try:
-        # Log answer on Azure Blob Storage
-        if os.getenv("GRADIO_ENV") != "local":
-            timestamp = str(datetime.now().timestamp())
-            file = timestamp + ".json"
-            prompt = history[1]["content"]
-            logs = {
-                "user_id": str(user_id),
-                "prompt": prompt,
-                "query": prompt,
-                "question":output_query,
-                "sources":sources,
-                "docs":serialize_docs(docs),
-                "answer": history[-1].content,
-                "time": timestamp,
-            }
-            log_on_azure(file, logs, share_client)
-    except Exception as e:
-        print(f"Error logging on Azure Blob Storage: {e}")
-        raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
-    yield history, docs_html, output_query, output_language, related_contents, graphs_html
-def save_feedback(feed: str, user_id):
-    if len(feed) > 1:
-        timestamp = str(datetime.now().timestamp())
-        file = user_id + timestamp + ".json"
-        logs = {
-            "user_id": user_id,
-            "feedback": feed,
-            "time": timestamp,
-        }
-        log_on_azure(file, logs, share_client)
-        return "Feedback submitted, thank you!"
-def log_on_azure(file, logs, share_client):
-    logs = json.dumps(logs)
-    file_client = share_client.get_file_client(file)
-    file_client.upload_file(logs)
 # --------------------------------------------------------------------
 # Gradio
 # --------------------------------------------------------------------
-init_prompt = """
-Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
-❓ How to use
-- **Language**: You can ask me your questions in any language.
-- **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
-- **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
-- **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
-⚠️ Limitations
-*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
-🛈 Information
-Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
-What do you want to learn ?
-"""
-def vote(data: gr.LikeData):
-    if data.liked:
-        print(data.value)
-    else:
-        print(data)
-def save_graph(saved_graphs_state, embedding, category):
-    print(f"\nCategory:\n{saved_graphs_state}\n")
-    if category not in saved_graphs_state:
-        saved_graphs_state[category] = []
-    if embedding not in saved_graphs_state[category]:
-        saved_graphs_state[category].append(embedding)
-    return saved_graphs_state, gr.Button("Graph Saved")
-with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme,elem_id = "main-component") as demo:
-    chat_completed_state = gr.State(0)
     current_graphs = gr.State([])
-    saved_graphs = gr.State({})
-    config_open = gr.State(False)
-    with gr.Tab("ClimateQ&A"):
         with gr.Row(elem_id="chatbot-row"):
             with gr.Column(scale=2):
-                chatbot = gr.Chatbot(
-                    value = [ChatMessage(role="assistant", content=init_prompt)],
-                    type = "messages",
-                    show_copy_button=True,
-                    show_label = False,
-                    elem_id="chatbot",
-                    layout = "panel",
-                    avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
-                    max_height="80vh",
-                    height="100vh"
-                )
-                # bot.like(vote,None,None)
-                with gr.Row(elem_id = "input-message"):
-                    textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=7,lines = 1,interactive = True,elem_id="input-textbox")
-                    config_button = gr.Button("",elem_id="config-button")
-                    # config_checkbox_button = gr.Checkbox(label = '⚙️', value="show",visible=True, interactive=True, elem_id="checkbox-config")
-            with gr.Column(scale=2, variant="panel",elem_id = "right-panel"):
-                with gr.Tabs(elem_id = "right_panel_tab") as tabs:
-                    with gr.TabItem("Examples",elem_id = "tab-examples",id = 0):
-                        examples_hidden = gr.Textbox(visible = False)
-                        first_key = list(QUESTIONS.keys())[0]
-                        dropdown_samples = gr.Dropdown(QUESTIONS.keys(),value = first_key,interactive = True,show_label = True,label = "Select a category of sample questions",elem_id = "dropdown-samples")
-                        samples = []
-                        for i,key in enumerate(QUESTIONS.keys()):
-                            examples_visible = True if i == 0 else False
-                            with gr.Row(visible = examples_visible) as group_examples:
-                                examples_questions = gr.Examples(
-                                    QUESTIONS[key],
-                                    [examples_hidden],
-                                    examples_per_page=8,
-                                    run_on_click=False,
-                                    elem_id=f"examples{i}",
-                                    api_name=f"examples{i}",
-                                    # label = "Click on the example question or enter your own",
-                                    # cache_examples=True,
-                                )
-                            samples.append(group_examples)
-                    # with gr.Tab("Configuration", id = 10, ) as tab_config:
-                    #         # gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
-                    #     pass
-                            # with gr.Row():
-                            #     dropdown_sources = gr.CheckboxGroup(
-                            #         ["IPCC", "IPBES","IPOS"],
-                            #         label="Select source",
-                            #         value=["IPCC"],
-                            #         interactive=True,
-                            #     )
-                            #     dropdown_external_sources = gr.CheckboxGroup(
-                            #         ["IPCC figures","OpenAlex", "OurWorldInData"],
-                            #         label="Select database to search for relevant content",
-                            #         value=["IPCC figures"],
-                            #         interactive=True,
-                            #     )
-                            # dropdown_reports = gr.Dropdown(
-                            #     POSSIBLE_REPORTS,
-                            #     label="Or select specific reports",
-                            #     multiselect=True,
-                            #     value=None,
-                            #     interactive=True,
-                            # )
-                            # search_only = gr.Checkbox(label="Search only without chating", value=False, interactive=True, elem_id="checkbox-chat")
-                            # dropdown_audience = gr.Dropdown(
-                            #     ["Children","General public","Experts"],
-                            #     label="Select audience",
-                            #     value="Experts",
-                            #     interactive=True,
-                            # )
-                            # after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers", visible=False)
-                            # output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False, visible= False)
-                            # output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False, visible= False)
-                            # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
-                            # # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
-                    with gr.Tab("Sources",elem_id = "tab-sources",id = 1) as tab_sources:
-                        sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
-                    with gr.Tab("Recommended content", elem_id="tab-recommended_content",id=2) as tab_recommended_content:
-                        with gr.Tabs(elem_id = "group-subtabs") as tabs_recommended_content:
-                            with gr.Tab("Figures",elem_id = "tab-figures",id = 3) as tab_figures:
-                                sources_raw = gr.State()
-                                with Modal(visible=False, elem_id="modal_figure_galery") as figure_modal:
-                                    gallery_component = gr.Gallery(object_fit='scale-down',elem_id="gallery-component", height="80vh")
-                                show_full_size_figures = gr.Button("Show figures in full size",elem_id="show-figures",interactive=True)
-                                show_full_size_figures.click(lambda : Modal(visible=True),None,figure_modal)
-                                figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
-                            with gr.Tab("Papers",elem_id = "tab-citations",id = 4) as tab_papers:
-                                # btn_summary = gr.Button("Summary")
-                                # Fenêtre simulée pour le Summary
-                                with gr.Accordion(visible=True, elem_id="papers-summary-popup", label= "See summary of relevant papers", open= False) as summary_popup:
-                                    papers_summary = gr.Markdown("", visible=True, elem_id="papers-summary")
-                                # btn_relevant_papers = gr.Button("Relevant papers")
-                                # Fenêtre simulée pour les Relevant Papers
-                                with gr.Accordion(visible=True, elem_id="papers-relevant-popup",label= "See relevant papers", open= False) as relevant_popup:
-                                    papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
-                                btn_citations_network = gr.Button("Explore papers citations network")
-                                # Fenêtre simulée pour le Citations Network
-                                with Modal(visible=False) as papers_modal:
-                                    citations_network = gr.HTML("<h3>Citations Network Graph</h3>", visible=True, elem_id="papers-citations-network")
-                                btn_citations_network.click(lambda: Modal(visible=True), None, papers_modal)
                             with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
-                                graphs_container = gr.HTML("<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",elem_id="graphs-container")
-                                current_graphs.change(lambda x : x, inputs=[current_graphs], outputs=[graphs_container])
-            with Modal(visible=False,elem_id="modal-config") as config_modal:
-                gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
-                # with gr.Row():
-                dropdown_sources = gr.CheckboxGroup(
-                    ["IPCC", "IPBES","IPOS"],
-                    label="Select source (by default search in all sources)",
-                    value=["IPCC"],
-                    interactive=True,
-                )
-                dropdown_reports = gr.Dropdown(
-                    POSSIBLE_REPORTS,
-                    label="Or select specific reports",
-                    multiselect=True,
-                    value=None,
-                    interactive=True,
-                )
-                dropdown_external_sources = gr.CheckboxGroup(
-                    ["IPCC figures","OpenAlex", "OurWorldInData"],
-                    label="Select database to search for relevant content",
-                    value=["IPCC figures"],
-                    interactive=True,
-                )
-                search_only = gr.Checkbox(label="Search only for recommended content without chating", value=False, interactive=True, elem_id="checkbox-chat")
-                dropdown_audience = gr.Dropdown(
-                    ["Children","General public","Experts"],
-                    label="Select audience",
-                    value="Experts",
-                    interactive=True,
-                )
-                after = gr.Slider(minimum=1950,maximum=2023,step=1,value=1960,label="Publication date",show_label=True,interactive=True,elem_id="date-papers", visible=False)
-                output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False, visible= False)
-                output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False, visible= False)
-                dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after])
-                close_config_modal = gr.Button("Validate and Close",elem_id="close-config-modal")
-                close_config_modal.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
-                # dropdown_external_sources.change(lambda x: gr.update(visible = True ) if "OpenAlex" in x else gr.update(visible=False) , inputs=[dropdown_external_sources], outputs=[after], visible=True)
-            config_button.click(fn=update_config_modal_visibility, inputs=[config_open], outputs=[config_modal, config_open])
-                    # with gr.Tab("OECD",elem_id = "tab-oecd",id = 6):
-                    #     oecd_indicator = "RIVER_FLOOD_RP100_POP_SH"
-                    #     oecd_topic = "climate"
-                    #     oecd_latitude = "46.8332"
-                    #     oecd_longitude = "5.3725"
-                    #     oecd_zoom = "5.6442"
-                    #     # Create the HTML content with the iframe
-                    #     iframe_html = f"""
-                    #     <iframe src="https://localdataportal.oecd.org/maps.html?indicator={oecd_indicator}&topic={oecd_topic}&latitude={oecd_latitude}&longitude={oecd_longitude}&zoom={oecd_zoom}"
-                    #             width="100%" height="600" frameborder="0" style="border:0;" allowfullscreen></iframe>
-                    #     """
-                    #     oecd_textbox = gr.HTML(iframe_html, show_label=False, elem_id="oecd-textbox")
-#---------------------------------------------------------------------------------------
-# OTHER TABS
-#---------------------------------------------------------------------------------------
-    # with gr.Tab("Settings",elem_id = "tab-config",id = 2):
-    #     gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
-    #     dropdown_sources = gr.CheckboxGroup(
-    #         ["IPCC", "IPBES","IPOS", "OpenAlex"],
-    #         label="Select source",
-    #         value=["IPCC"],
-    #         interactive=True,
-    #     )
-    #     dropdown_reports = gr.Dropdown(
-    #         POSSIBLE_REPORTS,
-    #         label="Or select specific reports",
-    #         multiselect=True,
-    #         value=None,
-    #         interactive=True,
-    #     )
-    #     dropdown_audience = gr.Dropdown(
-    #         ["Children","General public","Experts"],
-    #         label="Select audience",
-    #         value="Experts",
-    #         interactive=True,
-    #     )
-    #     output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
-    #     output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
-    with gr.Tab("About",elem_classes = "max-height other-tabs"):
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown(
-                    """
-                    ### More info
-                    - See more info at [https://climateqa.com](https://climateqa.com/docs/intro/)
-                    - Feedbacks on this [form](https://forms.office.com/e/1Yzgxm6jbp)
-                    ### Citation
-                    """
-                )
-                with gr.Accordion(CITATION_LABEL,elem_id="citation", open = False,):
-                    # # Display citation label and text)
-                    gr.Textbox(
-                        value=CITATION_TEXT,
-                        label="",
-                        interactive=False,
-                        show_copy_button=True,
-                        lines=len(CITATION_TEXT.split('\n')),
-                    )
-    def start_chat(query,history,search_only):
-        history = history + [ChatMessage(role="user", content=query)]
-        if not search_only:
-            return (gr.update(interactive = False),gr.update(selected=1),history)
-        else:
-            return (gr.update(interactive = False),gr.update(selected=2),history)
-    def finish_chat():
-        return gr.update(interactive = True,value = "")
-    # Initialize visibility states
-    summary_visible = False
-    relevant_visible = False
-    # Functions to toggle visibility
-    def toggle_summary_visibility():
-        global summary_visible
-        summary_visible = not summary_visible
-        return gr.update(visible=summary_visible)
-    def toggle_relevant_visibility():
-        global relevant_visible
-        relevant_visible = not relevant_visible
-        return gr.update(visible=relevant_visible)
-    def change_completion_status(current_state):
-        current_state = 1 - current_state
-        return current_state
-    def update_sources_number_display(sources_textbox, figures_cards, current_graphs, papers_html):
-        sources_number = sources_textbox.count("<h2>")
-        figures_number = figures_cards.count("<h2>")
-        graphs_number = current_graphs.count("<iframe")
-        papers_number = papers_html.count("<h2>")
-        sources_notif_label = f"Sources ({sources_number})"
-        figures_notif_label = f"Figures ({figures_number})"
-        graphs_notif_label = f"Graphs ({graphs_number})"
-        papers_notif_label = f"Papers ({papers_number})"
-        recommended_content_notif_label = f"Recommended content ({figures_number + graphs_number + papers_number})"
-        return gr.update(label = recommended_content_notif_label), gr.update(label = sources_notif_label), gr.update(label = figures_notif_label), gr.update(label = graphs_notif_label), gr.update(label = papers_notif_label)
-    (textbox
-        .submit(start_chat, [textbox,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
-        .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
-        .then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
-        # .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
-    )
-    (examples_hidden
-        .change(start_chat, [examples_hidden,chatbot, search_only], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
-        .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, dropdown_external_sources, search_only] ,[chatbot,sources_textbox,output_query,output_language, sources_raw, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
-        .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
-        # .then(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_sources, tab_figures, tab_graphs, tab_papers] )
-    )
-    def change_sample_questions(key):
-        index = list(QUESTIONS.keys()).index(key)
-        visible_bools = [False] * len(samples)
-        visible_bools[index] = True
-        return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
-    sources_raw.change(process_figures, inputs=[sources_raw], outputs=[figures_cards, gallery_component])
-    # update sources numbers
-    sources_textbox.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
-    figures_cards.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
-    current_graphs.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
-    papers_html.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs,papers_html],[tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
-    # other questions examples
-    dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
-    # search for papers
-    textbox.submit(find_papers,[textbox,after, dropdown_external_sources], [papers_html,citations_network,papers_summary])
-    examples_hidden.change(find_papers,[examples_hidden,after,dropdown_external_sources], [papers_html,citations_network,papers_summary])
-    # btn_summary.click(toggle_summary_visibility, outputs=summary_popup)
-    # btn_relevant_papers.click(toggle_relevant_visibility, outputs=relevant_popup)
-    demo.queue()
 demo.launch(ssr_mode=False)

+# Import necessary libraries
 import os
+import gradio as gr
 from azure.storage.fileshare import ShareServiceClient
+# Import custom modules
+from climateqa.engine.embeddings import get_embeddings_function
 from climateqa.engine.llm import get_llm
 from climateqa.engine.vectorstore import get_pinecone_vectorstore
 from climateqa.engine.reranker import get_reranker
+from climateqa.engine.graph import make_graph_agent,make_graph_agent_poc
 from climateqa.engine.chains.retrieve_papers import find_papers
+from climateqa.chat import start_chat, chat_stream, finish_chat
+from front.tabs import (create_config_modal, create_examples_tab, create_papers_tab, create_figures_tab, create_chat_interface, create_about_tab)
+from front.utils import process_figures
+from utils import create_user_id
+import logging
+logging.basicConfig(level=logging.WARNING)
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppresses INFO and WARNING logs
+logging.getLogger().setLevel(logging.WARNING)
 # Load environment variables in local mode
 try:
 except Exception as e:
     pass
 # Set up Gradio Theme
 theme = gr.themes.Base(
     font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
 )
+# Azure Blob Storage credentials
 account_key = os.environ["BLOB_ACCOUNT_KEY"]
 if len(account_key) == 86:
     account_key += "=="
 user_id = create_user_id()
 # Create vectorstore and retriever
+embeddings_function = get_embeddings_function()
+vectorstore = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX"))
+vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_OWID"), text_key="description")
+vectorstore_region = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_REGION"))
 llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
+if os.getenv("ENV")=="GRADIO_ENV":
+    reranker = get_reranker("nano")
+else:
+    reranker = get_reranker("large")
+agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0.2)
+agent_poc = make_graph_agent_poc(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, vectorstore_region = vectorstore_region, reranker=reranker, threshold_docs=0)#TODO put back default 0.2
+async def chat(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
+    print("chat cqa - message received")
+    async for event in chat_stream(agent, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
+        yield event
+async def chat_poc(query, history, audience, sources, reports, relevant_content_sources_selection, search_only):
+    print("chat poc - message received")
+    async for event in chat_stream(agent_poc, query, history, audience, sources, reports, relevant_content_sources_selection, search_only, share_client, user_id):
+        yield event
 # --------------------------------------------------------------------
 # Gradio
 # --------------------------------------------------------------------
+# Function to update modal visibility
+def update_config_modal_visibility(config_open):
+    new_config_visibility_status = not config_open
+    return gr.update(visible=new_config_visibility_status), new_config_visibility_status
+def update_sources_number_display(sources_textbox, figures_cards, current_graphs, papers_html):
+    sources_number = sources_textbox.count("<h2>")
+    figures_number = figures_cards.count("<h2>")
+    graphs_number = current_graphs.count("<iframe")
+    papers_number = papers_html.count("<h2>")
+    sources_notif_label = f"Sources ({sources_number})"
+    figures_notif_label = f"Figures ({figures_number})"
+    graphs_notif_label = f"Graphs ({graphs_number})"
+    papers_notif_label = f"Papers ({papers_number})"
+    recommended_content_notif_label = f"Recommended content ({figures_number + graphs_number + papers_number})"
+    return gr.update(label=recommended_content_notif_label), gr.update(label=sources_notif_label), gr.update(label=figures_notif_label), gr.update(label=graphs_notif_label), gr.update(label=papers_notif_label)
+# # UI Layout Components
+def cqa_tab(tab_name):
+    # State variables
     current_graphs = gr.State([])
+    with gr.Tab(tab_name):
         with gr.Row(elem_id="chatbot-row"):
+            # Left column - Chat interface
             with gr.Column(scale=2):
+                chatbot, textbox, config_button = create_chat_interface()
+            # Right column - Content panels
+            with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
+                with gr.Tabs(elem_id="right_panel_tab") as tabs:
+                    # Examples tab
+                    with gr.TabItem("Examples", elem_id="tab-examples", id=0):
+                        examples_hidden = create_examples_tab()
+                    # Sources tab
+                    with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
+                        sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
+                    # Recommended content tab
+                    with gr.Tab("Recommended content", elem_id="tab-recommended_content", id=2) as tab_recommended_content:
+                        with gr.Tabs(elem_id="group-subtabs") as tabs_recommended_content:
+                            # Figures subtab
+                            with gr.Tab("Figures", elem_id="tab-figures", id=3) as tab_figures:
+                                sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal = create_figures_tab()
+                            # Papers subtab
+                            with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
+                                papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
+                            # Graphs subtab
                             with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
+                                graphs_container = gr.HTML(
+                                    "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
+                                    elem_id="graphs-container"
+                                )
+    return {
+        "chatbot": chatbot,
+        "textbox": textbox,
+        "tabs": tabs,
+        "sources_raw": sources_raw,
+        "new_figures": new_figures,
+        "current_graphs": current_graphs,
+        "examples_hidden": examples_hidden,
+        "sources_textbox": sources_textbox,
+        "figures_cards": figures_cards,
+        "gallery_component": gallery_component,
+        "config_button": config_button,
+        "papers_html": papers_html,
+        "citations_network": citations_network,
+        "papers_summary": papers_summary,
+        "tab_recommended_content": tab_recommended_content,
+        "tab_sources": tab_sources,
+        "tab_figures": tab_figures,
+        "tab_graphs": tab_graphs,
+        "tab_papers": tab_papers,
+        "graph_container": graphs_container
+    }
+def event_handling(
+    main_tab_components,
+    config_components,
+    tab_name="ClimateQ&A"
+):
+    chatbot = main_tab_components["chatbot"]
+    textbox = main_tab_components["textbox"]
+    tabs = main_tab_components["tabs"]
+    sources_raw = main_tab_components["sources_raw"]
+    new_figures = main_tab_components["new_figures"]
+    current_graphs = main_tab_components["current_graphs"]
+    examples_hidden = main_tab_components["examples_hidden"]
+    sources_textbox = main_tab_components["sources_textbox"]
+    figures_cards = main_tab_components["figures_cards"]
+    gallery_component = main_tab_components["gallery_component"]
+    config_button = main_tab_components["config_button"]
+    papers_html = main_tab_components["papers_html"]
+    citations_network = main_tab_components["citations_network"]
+    papers_summary = main_tab_components["papers_summary"]
+    tab_recommended_content = main_tab_components["tab_recommended_content"]
+    tab_sources = main_tab_components["tab_sources"]
+    tab_figures = main_tab_components["tab_figures"]
+    tab_graphs = main_tab_components["tab_graphs"]
+    tab_papers = main_tab_components["tab_papers"]
+    graphs_container = main_tab_components["graph_container"]
+    config_open = config_components["config_open"]
+    config_modal = config_components["config_modal"]
+    dropdown_sources = config_components["dropdown_sources"]
+    dropdown_reports = config_components["dropdown_reports"]
+    dropdown_external_sources = config_components["dropdown_external_sources"]
+    search_only = config_components["search_only"]
+    dropdown_audience = config_components["dropdown_audience"]
+    after = config_components["after"]
+    output_query = config_components["output_query"]
+    output_language = config_components["output_language"]
+    close_config_modal = config_components["close_config_modal_button"]
+    new_sources_hmtl = gr.State([])
+    print("textbox id : ", textbox.elem_id)
+    for button in [config_button, close_config_modal]:
+        button.click(
+            fn=update_config_modal_visibility,
+            inputs=[config_open],
+            outputs=[config_modal, config_open]
+        )
+    if tab_name == "ClimateQ&A":
+        print("chat cqa - message sent")
+        # Event for textbox
+        (textbox
+            .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
+            .then(chat, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
+        )
+        # Event for examples_hidden
+        (examples_hidden
+            .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
+            .then(chat, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
+        )
+    elif tab_name == "Beta - POC Adapt'Action":
+        print("chat poc - message sent")
+        # Event for textbox
+        (textbox
+            .submit(start_chat, [textbox, chatbot, search_only], [textbox, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{textbox.elem_id}")
+            .then(chat_poc, [textbox, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{textbox.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{textbox.elem_id}")
+        )
+        # Event for examples_hidden
+        (examples_hidden
+            .change(start_chat, [examples_hidden, chatbot, search_only], [examples_hidden, tabs, chatbot, sources_raw], queue=False, api_name=f"start_chat_{examples_hidden.elem_id}")
+            .then(chat_poc, [examples_hidden, chatbot, dropdown_audience, dropdown_sources, dropdown_reports, dropdown_external_sources, search_only], [chatbot, new_sources_hmtl, output_query, output_language, new_figures, current_graphs], concurrency_limit=8, api_name=f"chat_{examples_hidden.elem_id}")
+            .then(finish_chat, None, [textbox], api_name=f"finish_chat_{examples_hidden.elem_id}")
+        )
+    new_sources_hmtl.change(lambda x : x, inputs = [new_sources_hmtl], outputs = [sources_textbox])
+    current_graphs.change(lambda x: x, inputs=[current_graphs], outputs=[graphs_container])
+    new_figures.change(process_figures, inputs=[sources_raw, new_figures], outputs=[sources_raw, figures_cards, gallery_component])
+    # Update sources numbers
+    for component in [sources_textbox, figures_cards, current_graphs, papers_html]:
+        component.change(update_sources_number_display, [sources_textbox, figures_cards, current_graphs, papers_html], [tab_recommended_content, tab_sources, tab_figures, tab_graphs, tab_papers])
+    # Search for papers
+    for component in [textbox, examples_hidden]:
+        component.submit(find_papers, [component, after, dropdown_external_sources], [papers_html, citations_network, papers_summary])
+def main_ui():
+    # config_open = gr.State(True)
+    with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme, elem_id="main-component") as demo:
+        config_components = create_config_modal()
+        with gr.Tabs():
+            cqa_components = cqa_tab(tab_name = "ClimateQ&A")
+            local_cqa_components = cqa_tab(tab_name = "Beta - POC Adapt'Action")
+            create_about_tab()
+        event_handling(cqa_components, config_components, tab_name = 'ClimateQ&A')
+        event_handling(local_cqa_components, config_components, tab_name = 'Beta - POC Adapt\'Action')
+        demo.queue()
+    return demo
+demo = main_ui()
 demo.launch(ssr_mode=False)

climateqa/chat.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import os
+from datetime import datetime
+import gradio as gr
+# from .agent import agent
+from gradio import ChatMessage
+from langgraph.graph.state import CompiledStateGraph
+import json
+from .handle_stream_events import (
+    init_audience,
+    handle_retrieved_documents,
+    convert_to_docs_to_html,
+    stream_answer,
+    handle_retrieved_owid_graphs,
+    serialize_docs,
+)
+# Function to log data on Azure
+def log_on_azure(file, logs, share_client):
+    logs = json.dumps(logs)
+    file_client = share_client.get_file_client(file)
+    file_client.upload_file(logs)
+# Chat functions
+def start_chat(query, history, search_only):
+    history = history + [ChatMessage(role="user", content=query)]
+    if not search_only:
+        return (gr.update(interactive=False), gr.update(selected=1), history, [])
+    else:
+        return (gr.update(interactive=False), gr.update(selected=2), history, [])
+def finish_chat():
+    return gr.update(interactive=True, value="")
+def log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id):
+    try:
+        # Log interaction to Azure if not in local environment
+        if os.getenv("GRADIO_ENV") != "local":
+            timestamp = str(datetime.now().timestamp())
+            prompt = history[1]["content"]
+            logs = {
+                "user_id": str(user_id),
+                "prompt": prompt,
+                "query": prompt,
+                "question": output_query,
+                "sources": sources,
+                "docs": serialize_docs(docs),
+                "answer": history[-1].content,
+                "time": timestamp,
+            }
+            log_on_azure(f"{timestamp}.json", logs, share_client)
+    except Exception as e:
+        print(f"Error logging on Azure Blob Storage: {e}")
+        error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
+        raise gr.Error(error_msg)
+# Main chat function
+async def chat_stream(
+    agent : CompiledStateGraph,
+    query: str,
+    history: list[ChatMessage],
+    audience: str,
+    sources: list[str],
+    reports: list[str],
+    relevant_content_sources_selection: list[str],
+    search_only: bool,
+    share_client,
+    user_id: str
+) -> tuple[list, str, str, str, list, str]:
+    """Process a chat query and return response with relevant sources and visualizations.
+    Args:
+        query (str): The user's question
+        history (list): Chat message history
+        audience (str): Target audience type
+        sources (list): Knowledge base sources to search
+        reports (list): Specific reports to search within sources
+        relevant_content_sources_selection (list): Types of content to retrieve (figures, papers, etc)
+        search_only (bool): Whether to only search without generating answer
+    Yields:
+        tuple: Contains:
+            - history: Updated chat history
+            - docs_html: HTML of retrieved documents
+            - output_query: Processed query
+            - output_language: Detected language
+            - related_contents: Related content
+            - graphs_html: HTML of relevant graphs
+    """
+    # Log incoming question
+    date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    print(f">> NEW QUESTION ({date_now}) : {query}")
+    audience_prompt = init_audience(audience)
+    sources = sources or ["IPCC", "IPBES"]
+    reports = reports or []
+    # Prepare inputs for agent
+    inputs = {
+        "user_input": query,
+        "audience": audience_prompt,
+        "sources_input": sources,
+        "relevant_content_sources_selection": relevant_content_sources_selection,
+        "search_only": search_only,
+        "reports": reports
+    }
+    # Get streaming events from agent
+    result = agent.astream_events(inputs, version="v1")
+    # Initialize state variables
+    docs = []
+    related_contents = []
+    docs_html = ""
+    new_docs_html = ""
+    output_query = ""
+    output_language = ""
+    output_keywords = ""
+    start_streaming = False
+    graphs_html = ""
+    used_documents = []
+    retrieved_contents = []
+    answer_message_content = ""
+    # Define processing steps
+    steps_display = {
+        "categorize_intent": ("🔄️ Analyzing user message", True),
+        "transform_query": ("🔄️ Thinking step by step to answer the question", True),
+        "retrieve_documents": ("🔄️ Searching in the knowledge base", False),
+        "retrieve_local_data": ("🔄️ Searching in the knowledge base", False),
+    }
+    try:
+        # Process streaming events
+        async for event in result:
+            if "langgraph_node" in event["metadata"]:
+                node = event["metadata"]["langgraph_node"]
+                # Handle document retrieval
+                if event["event"] == "on_chain_end" and event["name"] in ["retrieve_documents","retrieve_local_data"] and event["data"]["output"] != None:
+                    history, used_documents, retrieved_contents = handle_retrieved_documents(
+                        event, history, used_documents, retrieved_contents
+                    )
+                if event["event"] == "on_chain_end" and event["name"] == "answer_search" :
+                    docs = event["data"]["input"]["documents"]
+                    docs_html = convert_to_docs_to_html(docs)
+                    related_contents = event["data"]["input"]["related_contents"]
+                # Handle intent categorization
+                elif (event["event"] == "on_chain_end" and
+                      node == "categorize_intent" and
+                      event["name"] == "_write"):
+                    intent = event["data"]["output"]["intent"]
+                    output_language = event["data"]["output"].get("language", "English")
+                    history[-1].content = f"Language identified: {output_language}\nIntent identified: {intent}"
+                # Handle processing steps display
+                elif event["name"] in steps_display and event["event"] == "on_chain_start":
+                    event_description, display_output = steps_display[node]
+                    if (not hasattr(history[-1], 'metadata') or
+                        history[-1].metadata["title"] != event_description):
+                        history.append(ChatMessage(
+                            role="assistant",
+                            content="",
+                            metadata={'title': event_description}
+                        ))
+                # Handle answer streaming
+                elif (event["name"] != "transform_query" and
+                      event["event"] == "on_chat_model_stream" and
+                      node in ["answer_rag","answer_rag_no_docs", "answer_search", "answer_chitchat"]):
+                    history, start_streaming, answer_message_content = stream_answer(
+                        history, event, start_streaming, answer_message_content
+                    )
+                # Handle graph retrieval
+                elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
+                    graphs_html = handle_retrieved_owid_graphs(event, graphs_html)
+                # Handle query transformation
+                if event["name"] == "transform_query" and event["event"] == "on_chain_end":
+                    if hasattr(history[-1], "content"):
+                        sub_questions = [q["question"] + "-> relevant sources : " + str(q["sources"]) for q in event["data"]["output"]["questions_list"]]
+                        history[-1].content += "Decompose question into sub-questions:\n\n - " + "\n - ".join(sub_questions)
+            yield history, docs_html, output_query, output_language, related_contents, graphs_html
+    except Exception as e:
+        print(f"Event {event} has failed")
+        raise gr.Error(str(e))
+    # Call the function to log interaction
+    log_interaction_to_azure(history, output_query, sources, docs, share_client, user_id)
+    yield history, docs_html, output_query, output_language, related_contents, graphs_html

climateqa/constants.py CHANGED Viewed

@@ -1,4 +1,6 @@
 POSSIBLE_REPORTS = [
     "IPCC AR6 WGI SPM",
     "IPCC AR6 WGI FR",
     "IPCC AR6 WGI TS",

 POSSIBLE_REPORTS = [
+    "IPBES IABWFH SPM",
+    "IPBES CBL SPM",
     "IPCC AR6 WGI SPM",
     "IPCC AR6 WGI FR",
     "IPCC AR6 WGI TS",

climateqa/engine/chains/answer_rag.py CHANGED Viewed

@@ -11,7 +11,7 @@ import time
 from ..utils import rename_chain, pass_values
-DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
 def _combine_documents(
     docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
@@ -61,10 +61,11 @@ def make_rag_node(llm,with_docs = True):
         rag_chain = make_rag_chain(llm)
     else:
         rag_chain = make_rag_chain_without_docs(llm)
     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()
         answer = await rag_chain.ainvoke(state,config)

 from ..utils import rename_chain, pass_values
+DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="Source : {source} - Content : {page_content}")
 def _combine_documents(
     docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, sep="\n\n"
         rag_chain = make_rag_chain(llm)
     else:
         rag_chain = make_rag_chain_without_docs(llm)
     async def answer_rag(state,config):
         print("---- Answer RAG ----")
         start_time = time.time()
+        print("Sources used : " +  "\n".join([x.metadata["short_name"] + " - page " + str(x.metadata["page_number"])  for x in state["documents"]]))
         answer = await rag_chain.ainvoke(state,config)

climateqa/engine/chains/graph_retriever.py CHANGED Viewed

@@ -50,7 +50,9 @@ def make_graph_retriever_node(vectorstore, reranker, rerank_by_question=True, k_
         print("---- Retrieving graphs ----")
         POSSIBLE_SOURCES = ["IEA", "OWID"]
-        questions = state["remaining_questions"] if state["remaining_questions"] is not None and state["remaining_questions"]!=[]  else [state["query"]]
         # sources_input = state["sources_input"]
         sources_input = ["auto"]

         print("---- Retrieving graphs ----")
         POSSIBLE_SOURCES = ["IEA", "OWID"]
+        # questions = state["remaining_questions"] if state["remaining_questions"] is not None and state["remaining_questions"]!=[]  else [state["query"]]
+        questions = state["questions_list"] if state["questions_list"] is not None and state["questions_list"]!=[]  else [state["query"]]
         # sources_input = state["sources_input"]
         sources_input = ["auto"]

climateqa/engine/chains/prompts.py CHANGED Viewed

@@ -36,13 +36,40 @@ You are given a question and extracted passages of the IPCC and/or IPBES reports
 """
 answer_prompt_template = """
-You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of the IPCC and/or IPBES reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
 Guidelines:
 - If the passages have useful facts or numbers, use them in your answer.
 - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
-- Do not use the sentence 'Doc i says ...' to say where information came from.
 - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
 - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
 - If it makes sense, use bullet points and lists to make your answers easier to understand.
@@ -51,6 +78,7 @@ Guidelines:
 - Consider by default that the question is about the past century unless it is specified otherwise.
 - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
 -----------------------
 Passages:
 {context}
@@ -60,7 +88,6 @@ Question: {query} - Explained to {audience}
 Answer in {language} with the passages citations:
 """
 papers_prompt_template = """
 You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted abstracts of scientific papers. Provide a clear and structured answer based on the abstracts provided, the context and the guidelines.

 """
+# answer_prompt_template_old = """
+# You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
+# Guidelines:
+# - If the passages have useful facts or numbers, use them in your answer.
+# - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
+# - Do not use the sentence 'Doc i says ...' to say where information came from.
+# - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
+# - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
+# - If it makes sense, use bullet points and lists to make your answers easier to understand.
+# - You do not need to use every passage. Only use the ones that help answer the question.
+# - If the documents do not have the information needed to answer the question, just say you do not have enough information.
+# - Consider by default that the question is about the past century unless it is specified otherwise.
+# - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
+# -----------------------
+# Passages:
+# {context}
+# -----------------------
+# Question: {query} - Explained to {audience}
+# Answer in {language} with the passages citations:
+# """
 answer_prompt_template = """
+You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted passages of reports. Provide a clear and structured answer based on the passages provided, the context and the guidelines.
 Guidelines:
 - If the passages have useful facts or numbers, use them in your answer.
 - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
+- You will receive passages from different reports, eg IPCC and PPCP, make separate paragraphs and specify the source of the information in your answer, eg "According to IPCC, ...".
+- The different sources are IPCC, IPBES, PPCP (for Plan Climat Air Energie Territorial de Paris), PBDP (for Plan Biodiversité de Paris), Acclimaterra.
+- Do not mention that you are using specific extract documents, but mention only the source information. "According to IPCC, ..." rather than "According to the provided document from IPCC ..."
+- Make a clear distinction between information from IPCC, IPBES, Acclimaterra that are scientific reports and PPCP, PBDP that are strategic reports. Strategic reports should not be taken has verified facts, but as political or strategic decisions.
 - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
 - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
 - If it makes sense, use bullet points and lists to make your answers easier to understand.
 - Consider by default that the question is about the past century unless it is specified otherwise.
 - If the passage is the caption of a picture, you can still use it as part of your answer as any other document.
 -----------------------
 Passages:
 {context}
 Answer in {language} with the passages citations:
 """
 papers_prompt_template = """
 You are ClimateQ&A, an AI Assistant created by Ekimetrics. You are given a question and extracted abstracts of scientific papers. Provide a clear and structured answer based on the abstracts provided, the context and the guidelines.

climateqa/engine/chains/query_transformation.py CHANGED Viewed

@@ -7,43 +7,7 @@ from langchain.prompts import ChatPromptTemplate
 from langchain_core.utils.function_calling import convert_to_openai_function
 from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
-ROUTING_INDEX = {
-    "Vector":["IPCC","IPBES","IPOS"],
-    "OpenAlex":["OpenAlex"],
-}
-POSSIBLE_SOURCES = [y for values in ROUTING_INDEX.values() for y in values]
-# Prompt from the original paper https://arxiv.org/pdf/2305.14283
-# Query Rewriting for Retrieval-Augmented Large Language Models
-class QueryDecomposition(BaseModel):
-    """
-    Decompose the user query into smaller parts to think step by step to answer this question
-    Act as a simple planning agent
-    """
-    questions: List[str] = Field(
-        description="""
-        Think step by step to answer this question, and provide one or several search engine questions in English for knowledge that you need.
-        Suppose that the user is looking for information about climate change, energy, biodiversity, nature, and everything we can find the IPCC reports and scientific literature
-        - If it's already a standalone and explicit question, just return the reformulated question for the search engine
-        - If you need to decompose the question, output a list of maximum 2 to 3 questions
-    """
-    )
-class Location(BaseModel):
-    country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
-    location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
-class QueryAnalysis(BaseModel):
-    """
-    Analyzing the user query to extract topics, sources and date
-    Also do query expansion to get alternative search queries
-    Also provide simple keywords to feed a search engine
-    """
     # keywords: List[str] = Field(
     #     description="""
     #     Extract the keywords from the user query to feed a search engine as a list
@@ -68,17 +32,10 @@ class QueryAnalysis(BaseModel):
     #     This questions should help you get more context and information about the user query
     #     """
     # )
-    sources: List[Literal["IPCC", "IPBES", "IPOS"]] = Field( #,"OpenAlex"]] = Field(
-        ...,
-        description="""
-            Given a user question choose which documents would be most relevant for answering their question,
-            - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
-            - IPBES is for questions about biodiversity and nature
-            - IPOS is for questions about the ocean and deep sea mining
-        """,
-            # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
-    )
     # topics: List[Literal[
     #     "Climate change",
     #     "Biodiversity",
@@ -101,7 +58,82 @@ class QueryAnalysis(BaseModel):
     # location:Location
 def make_query_decomposition_chain(llm):
     openai_functions = [convert_to_openai_function(QueryDecomposition)]
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryDecomposition"})
@@ -115,7 +147,8 @@ def make_query_decomposition_chain(llm):
     return chain
-def make_query_rewriter_chain(llm):
     openai_functions = [convert_to_openai_function(QueryAnalysis)]
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
@@ -123,7 +156,7 @@ def make_query_rewriter_chain(llm):
     prompt = ChatPromptTemplate.from_messages([
-        ("system", "You are a helpful assistant, you will analyze, translate and reformulate the user input message using the function provided"),
         ("user", "input: {input}")
     ])
@@ -132,22 +165,63 @@ def make_query_rewriter_chain(llm):
     return chain
 def make_query_transform_node(llm,k_final=15):
     decomposition_chain = make_query_decomposition_chain(llm)
-    rewriter_chain = make_query_rewriter_chain(llm)
     def transform_query(state):
         print("---- Transform query ----")
-        if "sources_auto" not in state or state["sources_auto"] is None or state["sources_auto"] is False:
-            auto_mode = False
-        else:
-            auto_mode = True
-        sources_input = state.get("sources_input")
-        if sources_input is None: sources_input = ROUTING_INDEX["Vector"]
         new_state = {}
@@ -155,24 +229,41 @@ def make_query_transform_node(llm,k_final=15):
         decomposition_output = decomposition_chain.invoke({"input":state["query"]})
         new_state.update(decomposition_output)
         # Query Analysis
         questions = []
         for question in new_state["questions"]:
             question_state = {"question":question}
-            analysis_output = rewriter_chain.invoke({"input":question})
             # TODO WARNING llm should always return smthg
-            # The case when the llm does not return any sources
-            if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
-                analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
-            question_state.update(analysis_output)
-            questions.append(question_state)
         # Explode the questions into multiple questions with different sources
         new_questions = []
         for q in questions:
-            question,sources = q["question"],q["sources"]
             # If not auto mode we take the configuration
             if not auto_mode:
@@ -181,7 +272,7 @@ def make_query_transform_node(llm,k_final=15):
             for index,index_sources in ROUTING_INDEX.items():
                 selected_sources = list(set(sources).intersection(index_sources))
                 if len(selected_sources) > 0:
-                    new_questions.append({"question":question,"sources":selected_sources,"index":index})
         # # Add the number of questions to search
         # k_by_question = k_final // len(new_questions)
@@ -191,10 +282,16 @@ def make_query_transform_node(llm,k_final=15):
         # new_state["questions"] = new_questions
         # new_state["remaining_questions"] = new_questions
         new_state = {
-            "remaining_questions":new_questions,
-            "n_questions":len(new_questions),
         }
         return new_state

 from langchain_core.utils.function_calling import convert_to_openai_function
 from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
+# OLD QUERY ANALYSIS
     # keywords: List[str] = Field(
     #     description="""
     #     Extract the keywords from the user query to feed a search engine as a list
     #     This questions should help you get more context and information about the user query
     #     """
     # )
+    # - OpenAlex is for any other questions that are not in the previous categories but could be found in the scientific litterature
+    #
     # topics: List[Literal[
     #     "Climate change",
     #     "Biodiversity",
     # location:Location
+ROUTING_INDEX = {
+    "IPx":["IPCC", "IPBES", "IPOS"],
+    "POC": ["AcclimaTerra", "PCAET","Biodiv"],
+    "OpenAlex":["OpenAlex"],
+}
+POSSIBLE_SOURCES = [y for values in ROUTING_INDEX.values() for y in values]
+# Prompt from the original paper https://arxiv.org/pdf/2305.14283
+# Query Rewriting for Retrieval-Augmented Large Language Models
+class QueryDecomposition(BaseModel):
+    """
+    Decompose the user query into smaller parts to think step by step to answer this question
+    Act as a simple planning agent
+    """
+    questions: List[str] = Field(
+        description="""
+        Think step by step to answer this question, and provide one or several search engine questions in the provided language for knowledge that you need.
+        Suppose that the user is looking for information about climate change, energy, biodiversity, nature, and everything we can find the IPCC reports and scientific literature
+        - If it's already a standalone and explicit question, just return the reformulated question for the search engine
+        - If you need to decompose the question, output a list of maximum 2 to 3 questions
+    """
+    )
+class Location(BaseModel):
+    country:str = Field(...,description="The country if directly mentioned or inferred from the location (cities, regions, adresses), ex: France, USA, ...")
+    location:str = Field(...,description="The specific place if mentioned (cities, regions, addresses), ex: Marseille, New York, Wisconsin, ...")
+class QueryTranslation(BaseModel):
+    """Translate the query into a given language"""
+    question : str = Field(
+        description="""
+        Translate the questions into the given language
+        If the question is alrealdy in the given language, just return the same question
+        """,
+    )
+class QueryAnalysis(BaseModel):
+    """
+    Analyze the user query to extract the relevant sources
+    Deprecated:
+    Analyzing the user query to extract topics, sources and date
+    Also do query expansion to get alternative search queries
+    Also provide simple keywords to feed a search engine
+    """
+    sources: List[Literal["IPCC", "IPBES", "IPOS", "AcclimaTerra", "PCAET","Biodiv"]] = Field( #,"OpenAlex"]] = Field(
+        ...,
+        description="""
+            Given a user question choose which documents would be most relevant for answering their question,
+            - IPCC is for questions about climate change, energy, impacts, and everything we can find the IPCC reports
+            - IPBES is for questions about biodiversity and nature
+            - IPOS is for questions about the ocean and deep sea mining
+            - AcclimaTerra is for questions about any specific place in, or close to, the french region "Nouvelle-Aquitaine"
+            - PCAET is the Plan Climat Eneregie Territorial for the city of Paris
+            - Biodiv is the Biodiversity plan for the city of Paris
+        """,
+    )
 def make_query_decomposition_chain(llm):
+    """Chain to decompose a query into smaller parts to think step by step to answer this question
+    Args:
+        llm (_type_): _description_
+    Returns:
+        _type_: _description_
+    """
     openai_functions = [convert_to_openai_function(QueryDecomposition)]
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryDecomposition"})
     return chain
+def make_query_analysis_chain(llm):
+    """Analyze the user query to extract the relevant sources"""
     openai_functions = [convert_to_openai_function(QueryAnalysis)]
     llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryAnalysis"})
     prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a helpful assistant, you will analyze the user input message using the function provided"),
         ("user", "input: {input}")
     ])
     return chain
+def make_query_translation_chain(llm):
+    """Analyze the user query to extract the relevant sources"""
+    openai_functions = [convert_to_openai_function(QueryTranslation)]
+    llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"QueryTranslation"})
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a helpful assistant, translate the question into {language}"),
+        ("user", "input: {input}")
+    ])
+    chain = prompt | llm_with_functions | JsonOutputFunctionsParser()
+    return chain
+def group_by_sources_types(sources):
+    sources_types = {}
+    IPx_sources = ["IPCC", "IPBES", "IPOS"]
+    local_sources = ["AcclimaTerra", "PCAET","Biodiv"]
+    if any(source in IPx_sources for source in sources):
+        sources_types["IPx"] = list(set(sources).intersection(IPx_sources))
+    if any(source in local_sources for source in sources):
+        sources_types["POC"] = list(set(sources).intersection(local_sources))
+    return sources_types
 def make_query_transform_node(llm,k_final=15):
+    """
+    Creates a query transformation node that processes and transforms a given query state.
+    Args:
+        llm: The language model to be used for query decomposition and rewriting.
+        k_final (int, optional): The final number of questions to be generated. Defaults to 15.
+    Returns:
+        function: A function that takes a query state and returns a transformed state.
+    The returned function performs the following steps:
+        1. Checks if the query should be processed in auto mode based on the state.
+        2. Retrieves the input sources from the state or defaults to a predefined routing index.
+        3. Decomposes the query using the decomposition chain.
+        4. Analyzes each decomposed question using the rewriter chain.
+        5. Ensures that the sources returned by the language model are valid.
+        6. Explodes the questions into multiple questions with different sources based on the mode.
+        7. Constructs a new state with the transformed questions and their respective sources.
+    """
     decomposition_chain = make_query_decomposition_chain(llm)
+    query_analysis_chain = make_query_analysis_chain(llm)
+    query_translation_chain = make_query_translation_chain(llm)
     def transform_query(state):
         print("---- Transform query ----")
+        auto_mode = state.get("sources_auto", True)
+        sources_input = state.get("sources_input", ROUTING_INDEX["IPx"])
         new_state = {}
         decomposition_output = decomposition_chain.invoke({"input":state["query"]})
         new_state.update(decomposition_output)
         # Query Analysis
         questions = []
         for question in new_state["questions"]:
             question_state = {"question":question}
+            query_analysis_output = query_analysis_chain.invoke({"input":question})
             # TODO WARNING llm should always return smthg
+            # The case when the llm does not return any sources or wrong ouput
+            if not query_analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS","AcclimaTerra", "PCAET","Biodiv"] for source in query_analysis_output["sources"]):
+                query_analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
+            sources_types = group_by_sources_types(query_analysis_output["sources"])
+            for source_type,sources in sources_types.items():
+                question_state = {
+                    "question":question,
+                    "sources":sources,
+                    "source_type":source_type
+                }
+                questions.append(question_state)
+        # Translate question into the document language
+        for q in questions:
+            if q["source_type"]=="IPx":
+                translation_output = query_translation_chain.invoke({"input":q["question"],"language":"English"})
+                q["question"] = translation_output["question"]
+            elif q["source_type"]=="POC":
+                translation_output = query_translation_chain.invoke({"input":q["question"],"language":"French"})
+                q["question"] = translation_output["question"]
         # Explode the questions into multiple questions with different sources
         new_questions = []
         for q in questions:
+            question,sources,source_type = q["question"],q["sources"], q["source_type"]
             # If not auto mode we take the configuration
             if not auto_mode:
             for index,index_sources in ROUTING_INDEX.items():
                 selected_sources = list(set(sources).intersection(index_sources))
                 if len(selected_sources) > 0:
+                    new_questions.append({"question":question,"sources":selected_sources,"index":index, "source_type":source_type})
         # # Add the number of questions to search
         # k_by_question = k_final // len(new_questions)
         # new_state["questions"] = new_questions
         # new_state["remaining_questions"] = new_questions
+        n_questions = {
+            "total":len(new_questions),
+            "IPx":len([q for q in new_questions if q["index"] == "IPx"]),
+            "POC":len([q for q in new_questions if q["index"] == "POC"]),
+        }
         new_state = {
+            "questions_list":new_questions,
+            "n_questions":n_questions,
+            "handled_questions_index":[],
         }
         return new_state

climateqa/engine/chains/retrieve_documents.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langchain_core.runnables import chain
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.runnables import RunnableLambda
-from ..reranker import rerank_docs
 # from ...knowledge.retriever import ClimateQARetriever
 from ...knowledge.openalex import OpenAlexRetriever
 from .keywords_extraction import make_keywords_extraction_chain
@@ -15,7 +15,9 @@ from ..utils import log_event
 from langchain_core.vectorstores import VectorStore
 from typing import List
 from langchain_core.documents.base import Document
 def divide_into_parts(target, parts):
@@ -87,7 +89,7 @@ def _get_k_images_by_question(n_questions):
     elif n_questions == 2:
         return 5
     elif n_questions == 3:
-        return 2
     else:
         return 1
@@ -98,11 +100,77 @@ def _add_metadata_and_score(docs: List) -> Document:
         doc.page_content = doc.page_content.replace("\r\n"," ")
         doc.metadata["similarity_score"] = score
         doc.metadata["content"] = doc.page_content
-        doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
         # doc.page_content = f"""Doc {i+1} - {doc.metadata['short_name']}: {doc.page_content}"""
         docs_with_metadata.append(doc)
     return docs_with_metadata
 async def get_IPCC_relevant_documents(
     query: str,
     vectorstore:VectorStore,
@@ -164,8 +232,7 @@ async def get_IPCC_relevant_documents(
             "chunk_type":"text",
             "report_type": { "$nin":["SPM"]},
         }
-        k_full = k_total - len(docs_summaries)
-        docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_full)
         if search_figures:
             # Images
@@ -188,15 +255,45 @@ async def get_IPCC_relevant_documents(
     }
 # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
 # @chain
-async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5, k_images=5):
     """
-    Retrieve and rerank documents based on the current question in the state.
     Args:
         state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
         config (dict): Configuration settings for logging and other purposes.
         vectorstore (object): The vector store used to retrieve relevant documents.
         reranker (object): The reranker used to rerank the retrieved documents.
@@ -209,95 +306,160 @@ async def retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_qu
     Returns:
         dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
     """
-    print("---- Retrieve documents ----")
-    # Get the documents from the state
-    if "documents" in state and state["documents"] is not None:
-        docs = state["documents"]
-    else:
-        docs = []
-    # Get the related_content from the state
-    if "related_content" in state and state["related_content"] is not None:
-        related_content = state["related_content"]
-    else:
-        related_content = []
-    search_figures = "IPCC figures" in state["relevant_content_sources"]
-    search_only = state["search_only"]
-    # Get the current question
-    current_question = state["remaining_questions"][0]
-    remaining_questions = state["remaining_questions"][1:]
-    k_by_question = k_final // state["n_questions"]
-    k_summary_by_question = _get_k_summary_by_question(state["n_questions"])
-    k_images_by_question = _get_k_images_by_question(state["n_questions"])
     sources = current_question["sources"]
     question = current_question["question"]
     index = current_question["index"]
     print(f"Retrieve documents for question: {question}")
     await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
-    if index == "Vector": # always true for now
         docs_question_dict = await get_IPCC_relevant_documents(
             query  = question,
             vectorstore=vectorstore,
             search_figures = search_figures,
             sources = sources,
             min_size = 200,
-            k_summary = k_summary_by_question,
             k_total = k_before_reranking,
             k_images = k_images_by_question,
             threshold = 0.5,
             search_only = search_only,
         )
     # Rerank
-    if reranker is not None:
         with suppress_output():
-            docs_question_summary_reranked = rerank_docs(reranker,docs_question_dict["docs_summaries"],question)
-            docs_question_fulltext_reranked = rerank_docs(reranker,docs_question_dict["docs_full"],question)
-            docs_question_images_reranked = rerank_docs(reranker,docs_question_dict["docs_images"],question)
-            if rerank_by_question:
-                docs_question_summary_reranked = sorted(docs_question_summary_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
-                docs_question_fulltext_reranked = sorted(docs_question_fulltext_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
-                docs_question_images_reranked = sorted(docs_question_images_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
     else:
-        docs_question = docs_question_dict["docs_summaries"] + docs_question_dict["docs_full"]
         # Add a default reranking score
         for doc in docs_question:
             doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
-    docs_question = docs_question_summary_reranked + docs_question_fulltext_reranked
-    docs_question = docs_question[:k_by_question]
-    images_question = docs_question_images_reranked[:k_images]
     if reranker is not None and rerank_by_question:
-        docs_question = sorted(docs_question, key=lambda x: x.metadata["reranking_score"], reverse=True)
     # Add sources used in the metadata
     docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
     images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
-    # Add to the list of docs
-    docs.extend(docs_question)
-    related_content.extend(images_question)
-    new_state = {"documents":docs, "related_contents": related_content,"remaining_questions":remaining_questions}
     return new_state
-def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
-    @chain
-    async def retrieve_docs(state, config):
-        state =  await retrieve_documents(state,config, vectorstore,reranker,llm,rerank_by_question, k_final, k_before_reranking, k_summary)
         return state
-    return retrieve_docs

 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.runnables import RunnableLambda
+from ..reranker import rerank_docs, rerank_and_sort_docs
 # from ...knowledge.retriever import ClimateQARetriever
 from ...knowledge.openalex import OpenAlexRetriever
 from .keywords_extraction import make_keywords_extraction_chain
 from langchain_core.vectorstores import VectorStore
 from typing import List
 from langchain_core.documents.base import Document
+import asyncio
+from typing import Any, Dict, List, Tuple
 def divide_into_parts(target, parts):
     elif n_questions == 2:
         return 5
     elif n_questions == 3:
+        return 3
     else:
         return 1
         doc.page_content = doc.page_content.replace("\r\n"," ")
         doc.metadata["similarity_score"] = score
         doc.metadata["content"] = doc.page_content
+        if doc.metadata["page_number"] != "N/A":
+            doc.metadata["page_number"] = int(doc.metadata["page_number"]) + 1
+        else:
+            doc.metadata["page_number"] = 1
         # doc.page_content = f"""Doc {i+1} - {doc.metadata['short_name']}: {doc.page_content}"""
         docs_with_metadata.append(doc)
     return docs_with_metadata
+def remove_duplicates_chunks(docs):
+    # Remove duplicates or almost duplicates
+    docs = sorted(docs,key=lambda x: x[1],reverse=True)
+    seen = set()
+    result = []
+    for doc in docs:
+        if doc[0].page_content not in seen:
+            seen.add(doc[0].page_content)
+            result.append(doc)
+    return result
+async def get_POC_relevant_documents(
+    query: str,
+    vectorstore:VectorStore,
+    sources:list = ["Acclimaterra","PCAET","Plan Biodiversite"],
+    search_figures:bool = False,
+    search_only:bool = False,
+    k_documents:int = 10,
+    threshold:float = 0.6,
+    k_images: int = 5,
+    reports:list = [],
+    min_size:int = 200,
+) :
+    # Prepare base search kwargs
+    filters = {}
+    docs_question = []
+    docs_images = []
+    # TODO add source selection
+    # if len(reports) > 0:
+    #     filters["short_name"] = {"$in":reports}
+    # else:
+    #     filters["source"] = { "$in": sources}
+    filters_text = {
+        **filters,
+        "chunk_type":"text",
+        # "report_type": {}, # TODO  to be completed to choose the right documents / chapters according to the analysis of the question
+    }
+    docs_question = vectorstore.similarity_search_with_score(query=query,filter = filters_text,k = k_documents)
+    # remove duplicates or almost duplicates
+    docs_question = remove_duplicates_chunks(docs_question)
+    docs_question = [x for x in docs_question if x[1] > threshold]
+    if search_figures:
+        # Images
+        filters_image = {
+            **filters,
+            "chunk_type":"image"
+        }
+        docs_images = vectorstore.similarity_search_with_score(query=query,filter = filters_image,k = k_images)
+    docs_question, docs_images = _add_metadata_and_score(docs_question), _add_metadata_and_score(docs_images)
+    docs_question = [x for x in docs_question if len(x.page_content) > min_size]
+    return {
+        "docs_question" : docs_question,
+        "docs_images" : docs_images
+    }
 async def get_IPCC_relevant_documents(
     query: str,
     vectorstore:VectorStore,
             "chunk_type":"text",
             "report_type": { "$nin":["SPM"]},
         }
+        docs_full = vectorstore.similarity_search_with_score(query=query,filter = filters_full,k = k_total)
         if search_figures:
             # Images
     }
+def concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question):
+    # Keep the right number of documents - The k_summary documents from SPM are placed in front
+    if source_type == "IPx":
+        docs_question = docs_question_dict["docs_summaries"][:k_summary_by_question] + docs_question_dict["docs_full"][:(k_by_question - k_summary_by_question)]
+    elif source_type == "POC" :
+        docs_question = docs_question_dict["docs_question"][:k_by_question]
+    else :
+        raise ValueError("source_type should be either Vector or POC")
+        # docs_question = [doc for key in docs_question_dict.keys() for doc in docs_question_dict[key]][:(k_by_question)]
+    images_question = docs_question_dict["docs_images"][:k_images_by_question]
+    return docs_question, images_question
 # The chain callback is not necessary, but it propagates the langchain callbacks to the astream_events logger to display intermediate results
 # @chain
+async def retrieve_documents(
+    current_question: Dict[str, Any],
+    config: Dict[str, Any],
+    source_type: str,
+    vectorstore: VectorStore,
+    reranker: Any,
+    search_figures: bool = False,
+    search_only: bool = False,
+    reports: list = [],
+    rerank_by_question: bool = True,
+    k_images_by_question: int = 5,
+    k_before_reranking: int = 100,
+    k_by_question: int = 5,
+    k_summary_by_question: int = 3
+) -> Tuple[List[Document], List[Document]]:
     """
+    Unpack the first question of the remaining questions, and retrieve and rerank corresponding documents, based on the question and selected_sources
     Args:
         state (dict): The current state containing documents, related content, relevant content sources, remaining questions and n_questions.
+        current_question (dict): The current question being processed.
         config (dict): Configuration settings for logging and other purposes.
         vectorstore (object): The vector store used to retrieve relevant documents.
         reranker (object): The reranker used to rerank the retrieved documents.
     Returns:
         dict: The updated state containing the retrieved and reranked documents, related content, and remaining questions.
     """
     sources = current_question["sources"]
     question = current_question["question"]
     index = current_question["index"]
+    source_type = current_question["source_type"]
     print(f"Retrieve documents for question: {question}")
     await log_event({"question":question,"sources":sources,"index":index},"log_retriever",config)
+    print(f"""---- Retrieve documents from {current_question["source_type"]}----""")
+    if source_type == "IPx":
         docs_question_dict = await get_IPCC_relevant_documents(
             query  = question,
             vectorstore=vectorstore,
             search_figures = search_figures,
             sources = sources,
             min_size = 200,
+            k_summary = k_before_reranking-1,
             k_total = k_before_reranking,
             k_images = k_images_by_question,
             threshold = 0.5,
             search_only = search_only,
+            reports = reports,
         )
+    if source_type == "POC":
+        docs_question_dict = await get_POC_relevant_documents(
+            query = question,
+            vectorstore=vectorstore,
+            search_figures = search_figures,
+            sources = sources,
+            threshold = 0.5,
+            search_only = search_only,
+            reports = reports,
+            min_size= 200,
+            k_documents= k_before_reranking,
+            k_images= k_by_question
+        )
     # Rerank
+    if reranker is not None and rerank_by_question:
         with suppress_output():
+            for key in docs_question_dict.keys():
+                docs_question_dict[key] = rerank_and_sort_docs(reranker,docs_question_dict[key],question)
     else:
         # Add a default reranking score
         for doc in docs_question:
             doc.metadata["reranking_score"] = doc.metadata["similarity_score"]
+    # Keep the right number of documents
+    docs_question, images_question = concatenate_documents(index, source_type, docs_question_dict, k_by_question, k_summary_by_question, k_images_by_question)
+    # Rerank the documents to put the most relevant in front
     if reranker is not None and rerank_by_question:
+        docs_question = rerank_and_sort_docs(reranker, docs_question, question)
     # Add sources used in the metadata
     docs_question = _add_sources_used_in_metadata(docs_question,sources,question,index)
     images_question = _add_sources_used_in_metadata(images_question,sources,question,index)
+    return docs_question, images_question
+async def retrieve_documents_for_all_questions(state, config, source_type, to_handle_questions_index, vectorstore, reranker, rerank_by_question=True, k_final=15, k_before_reranking=100):
+    """
+    Retrieve documents in parallel for all questions.
+    """
+    # to_handle_questions_index = [x for x in state["questions_list"] if x["source_type"] == "IPx"]
+    # TODO split les questions selon le type de sources dans le state question + conditions sur le nombre de questions traités par type de source
+    docs = state.get("documents", [])
+    related_content = state.get("related_content", [])
+    search_figures = "Figures (IPCC/IPBES)" in state["relevant_content_sources_selection"]
+    search_only = state["search_only"]
+    reports = state["reports"]
+    k_by_question = k_final // state["n_questions"]["total"]
+    k_summary_by_question = _get_k_summary_by_question(state["n_questions"]["total"])
+    k_images_by_question = _get_k_images_by_question(state["n_questions"]["total"])
+    k_before_reranking=100
+    tasks = [
+        retrieve_documents(
+            current_question=question,
+            config=config,
+            source_type=source_type,
+            vectorstore=vectorstore,
+            reranker=reranker,
+            search_figures=search_figures,
+            search_only=search_only,
+            reports=reports,
+            rerank_by_question=rerank_by_question,
+            k_images_by_question=k_images_by_question,
+            k_before_reranking=k_before_reranking,
+            k_by_question=k_by_question,
+            k_summary_by_question=k_summary_by_question
+        )
+        for i, question in enumerate(state["questions_list"]) if i in to_handle_questions_index
+    ]
+    results = await asyncio.gather(*tasks)
+    # Combine results
+    new_state = {"documents": [], "related_contents": [], "handled_questions_index": to_handle_questions_index}
+    for docs_question, images_question in results:
+        new_state["documents"].extend(docs_question)
+        new_state["related_contents"].extend(images_question)
     return new_state
+def make_IPx_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
+    async def retrieve_IPx_docs(state, config):
+        source_type = "IPx"
+        IPx_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
+        # return {"documents":[], "related_contents": [], "handled_questions_index": list(range(len(state["questions_list"])))} # TODO Remove
+        state = await retrieve_documents_for_all_questions(
+            state=state,
+            config=config,
+            source_type=source_type,
+            to_handle_questions_index=IPx_questions_index,
+            vectorstore=vectorstore,
+            reranker=reranker,
+            rerank_by_question=rerank_by_question,
+            k_final=k_final,
+            k_before_reranking=k_before_reranking,
+        )
+        return state
+    return retrieve_IPx_docs
+def make_POC_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_final=15, k_before_reranking=100, k_summary=5):
+    async def retrieve_POC_docs_node(state, config):
+        if "POC region" not in state["relevant_content_sources_selection"]  :
+            return {}
+        source_type = "POC"
+        POC_questions_index = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
+        state = await retrieve_documents_for_all_questions(
+            state=state,
+            config=config,
+            source_type=source_type,
+            to_handle_questions_index=POC_questions_index,
+            vectorstore=vectorstore,
+            reranker=reranker,
+            rerank_by_question=rerank_by_question,
+            k_final=k_final,
+            k_before_reranking=k_before_reranking,
+        )
         return state
+    return retrieve_POC_docs_node

climateqa/engine/chains/retrieve_papers.py CHANGED Viewed

@@ -32,8 +32,8 @@ def generate_keywords(query):
     return keywords
-async def find_papers(query,after, relevant_content_sources, reranker= reranker):
-    if "OpenAlex" in relevant_content_sources:
         summary = ""
         keywords = generate_keywords(query)
         df_works = oa.search(keywords,after = after)

     return keywords
+async def find_papers(query,after, relevant_content_sources_selection, reranker= reranker):
+    if "Papers (OpenAlex)" in relevant_content_sources_selection:
         summary = ""
         keywords = generate_keywords(query)
         df_works = oa.search(keywords,after = after)

climateqa/engine/graph.py CHANGED Viewed

@@ -9,6 +9,9 @@ from langchain_core.runnables.graph import CurveStyle, MermaidDrawMethod
 from typing_extensions import TypedDict
 from typing import List, Dict
 from IPython.display import display, HTML, Image
 from .chains.answer_chitchat import make_chitchat_node
@@ -16,7 +19,7 @@ from .chains.answer_ai_impact import make_ai_impact_node
 from .chains.query_transformation import make_query_transform_node
 from .chains.translation import make_translation_node
 from .chains.intent_categorization import make_intent_categorization_node
-from .chains.retrieve_documents import make_retriever_node
 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
@@ -31,25 +34,30 @@ class GraphState(TypedDict):
     intent : str
     search_graphs_chitchat : bool
     query: str
-    remaining_questions : List[dict]
     n_questions : int
     answer: str
     audience: str = "experts"
     sources_input: List[str] = ["IPCC","IPBES"]
-    relevant_content_sources: List[str] = ["IPCC figures"]
     sources_auto: bool = True
     min_year: int = 1960
     max_year: int = None
-    documents: List[Document]
-    related_contents : Dict[str,Document]
     recommended_content : List[Document]
     search_only : bool = False
 def search(state): #TODO
-    return state
 def answer_search(state):#TODO
-    return state
 def route_intent(state):
     intent = state["intent"]
@@ -59,7 +67,7 @@ def route_intent(state):
     #     return "answer_ai_impact"
     else:
         # Search route
-        return "search"
 def chitchat_route_intent(state):
     intent = state["search_graphs_chitchat"]
@@ -72,27 +80,74 @@ def route_translation(state):
     if state["language"].lower() == "english":
         return "transform_query"
     else:
-        return "translate_query"
 def route_based_on_relevant_docs(state,threshold_docs=0.2):
     docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
     if len(docs) > 0:
         return "answer_rag"
     else:
         return "answer_rag_no_docs"
-def route_retrieve_documents(state):
-    if state["search_only"] :
-        return END
-    elif len(state["remaining_questions"]) > 0:
         return "retrieve_documents"
     else:
-        return "answer_search"
 def make_id_dict(values):
     return {k:k for k in values}
-def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, threshold_docs=0.2):
     workflow = StateGraph(GraphState)
@@ -102,8 +157,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     translate_query = make_translation_node(llm)
     answer_chitchat = make_chitchat_node(llm)
     answer_ai_impact = make_ai_impact_node(llm)
-    retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
@@ -111,13 +167,14 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
-    workflow.add_node("search", search)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("transform_query", transform_query)
     workflow.add_node("translate_query", translate_query)
     workflow.add_node("answer_chitchat", answer_chitchat)
     workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
     workflow.add_node("retrieve_graphs", retrieve_graphs)
     workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
@@ -130,7 +187,7 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     workflow.add_conditional_edges(
         "categorize_intent",
         route_intent,
-        make_id_dict(["answer_chitchat","search"])
     )
     workflow.add_conditional_edges(
@@ -140,15 +197,96 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     )
     workflow.add_conditional_edges(
-        "search",
         route_translation,
         make_id_dict(["translate_query","transform_query"])
     )
     workflow.add_conditional_edges(
-        "retrieve_documents",
-        # lambda state : "retrieve_documents" if len(state["remaining_questions"]) > 0 else "answer_search",
         route_retrieve_documents,
-        make_id_dict([END,"retrieve_documents","answer_search"])
     )
     workflow.add_conditional_edges(
@@ -158,13 +296,15 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     )
     workflow.add_conditional_edges(
         "transform_query",
-        lambda state : "retrieve_graphs" if "OurWorldInData" in state["relevant_content_sources"]  else END,
         make_id_dict(["retrieve_graphs", END])
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
-    workflow.add_edge("transform_query", "retrieve_documents")
     workflow.add_edge("retrieve_graphs", END)
     workflow.add_edge("answer_rag", END)
@@ -172,6 +312,8 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
     # Compile
     app = workflow.compile()

 from typing_extensions import TypedDict
 from typing import List, Dict
+import operator
+from typing import Annotated
 from IPython.display import display, HTML, Image
 from .chains.answer_chitchat import make_chitchat_node
 from .chains.query_transformation import make_query_transform_node
 from .chains.translation import make_translation_node
 from .chains.intent_categorization import make_intent_categorization_node
+from .chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriever_node
 from .chains.answer_rag import make_rag_node
 from .chains.graph_retriever import make_graph_retriever_node
 from .chains.chitchat_categorization import make_chitchat_intent_categorization_node
     intent : str
     search_graphs_chitchat : bool
     query: str
+    questions_list : List[dict]
+    handled_questions_index : Annotated[list[int], operator.add]
     n_questions : int
     answer: str
     audience: str = "experts"
     sources_input: List[str] = ["IPCC","IPBES"]
+    relevant_content_sources_selection: List[str] = ["Figures (IPCC/IPBES)"]
     sources_auto: bool = True
     min_year: int = 1960
     max_year: int = None
+    documents: Annotated[List[Document], operator.add]
+    related_contents : Annotated[List[Document], operator.add]
     recommended_content : List[Document]
     search_only : bool = False
+    reports : List[str] = []
+def dummy(state):
+    return
 def search(state): #TODO
+    return
 def answer_search(state):#TODO
+    return
 def route_intent(state):
     intent = state["intent"]
     #     return "answer_ai_impact"
     else:
         # Search route
+        return "answer_climate"
 def chitchat_route_intent(state):
     intent = state["search_graphs_chitchat"]
     if state["language"].lower() == "english":
         return "transform_query"
     else:
+        return "transform_query"
+        # return "translate_query" #TODO : add translation
 def route_based_on_relevant_docs(state,threshold_docs=0.2):
     docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
+    print("Route : ", ["answer_rag" if len(docs) > 0 else "answer_rag_no_docs"])
     if len(docs) > 0:
         return "answer_rag"
     else:
         return "answer_rag_no_docs"
+def route_continue_retrieve_documents(state):
+    index_question_ipx = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "IPx"]
+    questions_ipx_finished = all(elem in state["handled_questions_index"] for elem in index_question_ipx)
+    # if questions_ipx_finished and state["search_only"]:
+    #     return END
+    if questions_ipx_finished:
+        return "end_retrieve_IPx_documents"
+    else:
         return "retrieve_documents"
+    # if state["n_questions"]["IPx"] == len(state["handled_questions_index"]) and state["search_only"] :
+    #     return END
+    # elif state["n_questions"]["IPx"] == len(state["handled_questions_index"]):
+    #     return "answer_search"
+    # else :
+    #     return "retrieve_documents"
+def route_continue_retrieve_local_documents(state):
+    index_question_poc = [i for i, x in enumerate(state["questions_list"]) if x["source_type"] == "POC"]
+    questions_poc_finished = all(elem in state["handled_questions_index"] for elem in index_question_poc)
+    # if questions_poc_finished and state["search_only"]:
+    #     return END
+    if questions_poc_finished or ("POC region" not in state["relevant_content_sources_selection"]):
+        return "end_retrieve_local_documents"
     else:
+        return "retrieve_local_data"
+    # if state["n_questions"]["POC"] == len(state["handled_questions_index"]) and state["search_only"] :
+    #     return END
+    # elif state["n_questions"]["POC"] == len(state["handled_questions_index"]):
+    #     return "answer_search"
+    # else :
+    #     return "retrieve_local_data"
+    # if len(state["remaining_questions"]) == 0 and state["search_only"] :
+        # return END
+    # elif len(state["remaining_questions"]) > 0:
+    #     return "retrieve_documents"
+    # else:
+    #     return "answer_search"
+def route_retrieve_documents(state):
+    sources_to_retrieve = []
+    if "Graphs (OurWorldInData)" in state["relevant_content_sources_selection"]  :
+        sources_to_retrieve.append("retrieve_graphs")
+    if sources_to_retrieve == []:
+        return END
+    return sources_to_retrieve
 def make_id_dict(values):
     return {k:k for k in values}
+def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
     workflow = StateGraph(GraphState)
     translate_query = make_translation_node(llm)
     answer_chitchat = make_chitchat_node(llm)
     answer_ai_impact = make_ai_impact_node(llm)
+    retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
     retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
+    # retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
     answer_rag = make_rag_node(llm, with_docs=True)
     answer_rag_no_docs = make_rag_node(llm, with_docs=False)
     chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
     # Define the nodes
     # workflow.add_node("set_defaults", set_defaults)
     workflow.add_node("categorize_intent", categorize_intent)
+    workflow.add_node("answer_climate", dummy)
     workflow.add_node("answer_search", answer_search)
     workflow.add_node("transform_query", transform_query)
     workflow.add_node("translate_query", translate_query)
     workflow.add_node("answer_chitchat", answer_chitchat)
     workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
     workflow.add_node("retrieve_graphs", retrieve_graphs)
+    # workflow.add_node("retrieve_local_data", retrieve_local_data)
     workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
     workflow.add_node("retrieve_documents", retrieve_documents)
     workflow.add_node("answer_rag", answer_rag)
     workflow.add_conditional_edges(
         "categorize_intent",
         route_intent,
+        make_id_dict(["answer_chitchat","answer_climate"])
     )
     workflow.add_conditional_edges(
     )
     workflow.add_conditional_edges(
+        "answer_climate",
         route_translation,
         make_id_dict(["translate_query","transform_query"])
     )
+    workflow.add_conditional_edges(
+        "answer_search",
+        lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
+        make_id_dict(["answer_rag","answer_rag_no_docs"])
+    )
     workflow.add_conditional_edges(
+        "transform_query",
         route_retrieve_documents,
+        make_id_dict(["retrieve_graphs", END])
+    )
+    # Define the edges
+    workflow.add_edge("translate_query", "transform_query")
+    workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
+    # workflow.add_edge("transform_query", "retrieve_local_data")
+    # workflow.add_edge("transform_query", END) # TODO remove
+    workflow.add_edge("retrieve_graphs", END)
+    workflow.add_edge("answer_rag", END)
+    workflow.add_edge("answer_rag_no_docs", END)
+    workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
+    workflow.add_edge("retrieve_graphs_chitchat", END)
+    # workflow.add_edge("retrieve_local_data", "answer_search")
+    workflow.add_edge("retrieve_documents", "answer_search")
+    # Compile
+    app = workflow.compile()
+    return app
+def make_graph_agent_poc(llm, vectorstore_ipcc, vectorstore_graphs, vectorstore_region, reranker, threshold_docs=0.2):
+    workflow = StateGraph(GraphState)
+    # Define the node functions
+    categorize_intent = make_intent_categorization_node(llm)
+    transform_query = make_query_transform_node(llm)
+    translate_query = make_translation_node(llm)
+    answer_chitchat = make_chitchat_node(llm)
+    answer_ai_impact = make_ai_impact_node(llm)
+    retrieve_documents = make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)
+    retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
+    retrieve_local_data = make_POC_retriever_node(vectorstore_region, reranker, llm)
+    answer_rag = make_rag_node(llm, with_docs=True)
+    answer_rag_no_docs = make_rag_node(llm, with_docs=False)
+    chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
+    # Define the nodes
+    # workflow.add_node("set_defaults", set_defaults)
+    workflow.add_node("categorize_intent", categorize_intent)
+    workflow.add_node("answer_climate", dummy)
+    workflow.add_node("answer_search", answer_search)
+    # workflow.add_node("end_retrieve_local_documents", dummy)
+    # workflow.add_node("end_retrieve_IPx_documents", dummy)
+    workflow.add_node("transform_query", transform_query)
+    workflow.add_node("translate_query", translate_query)
+    workflow.add_node("answer_chitchat", answer_chitchat)
+    workflow.add_node("chitchat_categorize_intent", chitchat_categorize_intent)
+    workflow.add_node("retrieve_graphs", retrieve_graphs)
+    workflow.add_node("retrieve_local_data", retrieve_local_data)
+    workflow.add_node("retrieve_graphs_chitchat", retrieve_graphs)
+    workflow.add_node("retrieve_documents", retrieve_documents)
+    workflow.add_node("answer_rag", answer_rag)
+    workflow.add_node("answer_rag_no_docs", answer_rag_no_docs)
+    # Entry point
+    workflow.set_entry_point("categorize_intent")
+    # CONDITIONAL EDGES
+    workflow.add_conditional_edges(
+        "categorize_intent",
+        route_intent,
+        make_id_dict(["answer_chitchat","answer_climate"])
+    )
+    workflow.add_conditional_edges(
+        "chitchat_categorize_intent",
+        chitchat_route_intent,
+        make_id_dict(["retrieve_graphs_chitchat", END])
+    )
+    workflow.add_conditional_edges(
+        "answer_climate",
+        route_translation,
+        make_id_dict(["translate_query","transform_query"])
     )
     workflow.add_conditional_edges(
     )
     workflow.add_conditional_edges(
         "transform_query",
+        route_retrieve_documents,
         make_id_dict(["retrieve_graphs", END])
     )
     # Define the edges
     workflow.add_edge("translate_query", "transform_query")
+    workflow.add_edge("transform_query", "retrieve_documents") #TODO put back
+    workflow.add_edge("transform_query", "retrieve_local_data")
+    # workflow.add_edge("transform_query", END) # TODO remove
     workflow.add_edge("retrieve_graphs", END)
     workflow.add_edge("answer_rag", END)
     workflow.add_edge("answer_chitchat", "chitchat_categorize_intent")
     workflow.add_edge("retrieve_graphs_chitchat", END)
+    workflow.add_edge("retrieve_local_data", "answer_search")
+    workflow.add_edge("retrieve_documents", "answer_search")
     # Compile
     app = workflow.compile()

climateqa/engine/reranker.py CHANGED Viewed

@@ -47,4 +47,9 @@ def rerank_docs(reranker,docs,query):
         doc.metadata["reranking_score"] = result.score
         doc.metadata["query_used_for_retrieval"] = query
         docs_reranked.append(doc)
     return docs_reranked

         doc.metadata["reranking_score"] = result.score
         doc.metadata["query_used_for_retrieval"] = query
         docs_reranked.append(doc)
+    return docs_reranked
+def rerank_and_sort_docs(reranker, docs, query):
+    docs_reranked = rerank_docs(reranker,docs,query)
+    docs_reranked = sorted(docs_reranked, key=lambda x: x.metadata["reranking_score"], reverse=True)
     return docs_reranked

climateqa/{event_handler.py → handle_stream_events.py} RENAMED Viewed

@@ -15,7 +15,14 @@ def init_audience(audience :str) -> str:
         audience_prompt = audience_prompts["experts"]
     return audience_prompt
-def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str]) -> tuple[str, list[ChatMessage], list[str]]:
     """
     Handles the retrieved documents and returns the HTML representation of the documents
@@ -27,26 +34,22 @@ def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage],
     Returns:
         tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
     """
     try:
-        docs = event["data"]["output"]["documents"]
-        docs_html = []
-        textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
-        for i, d in enumerate(textual_docs, 1):
-            if d.metadata["chunk_type"] == "text":
-                docs_html.append(make_html_source(d, i))
         used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
         if used_documents!=[]:
             history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
-        docs_html = "".join(docs_html)
-        related_contents = event["data"]["output"]["related_contents"]
     except Exception as e:
         print(f"Error getting documents: {e}")
         print(event)
-    return docs, docs_html, history, used_documents, related_contents
 def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
     """

         audience_prompt = audience_prompts["experts"]
     return audience_prompt
+def convert_to_docs_to_html(docs: list[dict]) -> str:
+    docs_html = []
+    for i, d in enumerate(docs, 1):
+        if d.metadata["chunk_type"] == "text":
+            docs_html.append(make_html_source(d, i))
+    return "".join(docs_html)
+def handle_retrieved_documents(event: StreamEvent, history : list[ChatMessage], used_documents : list[str],related_content:list[str]) -> tuple[str, list[ChatMessage], list[str]]:
     """
     Handles the retrieved documents and returns the HTML representation of the documents
     Returns:
         tuple[str, list[ChatMessage], list[str]]: The updated HTML representation of the documents, the updated message history and the updated list of used documents
     """
+    if "documents" not in event["data"]["output"] or event["data"]["output"]["documents"] == []:
+        return history, used_documents, related_content
     try:
+        docs = event["data"]["output"]["documents"]
         used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
         if used_documents!=[]:
             history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
+        #TODO do the same for related contents
     except Exception as e:
         print(f"Error getting documents: {e}")
         print(event)
+    return history, used_documents, related_content
 def stream_answer(history: list[ChatMessage], event : StreamEvent, start_streaming : bool, answer_message_content : str)-> tuple[list[ChatMessage], bool, str]:
     """

front/deprecated.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# Functions to toggle visibility
+def toggle_summary_visibility():
+    global summary_visible
+    summary_visible = not summary_visible
+    return gr.update(visible=summary_visible)
+def toggle_relevant_visibility():
+    global relevant_visible
+    relevant_visible = not relevant_visible
+    return gr.update(visible=relevant_visible)
+def change_completion_status(current_state):
+    current_state = 1 - current_state
+    return current_state
+def vote(data: gr.LikeData):
+    if data.liked:
+        print(data.value)
+    else:
+        print(data)
+def save_graph(saved_graphs_state, embedding, category):
+    print(f"\nCategory:\n{saved_graphs_state}\n")
+    if category not in saved_graphs_state:
+        saved_graphs_state[category] = []
+    if embedding not in saved_graphs_state[category]:
+        saved_graphs_state[category].append(embedding)
+    return saved_graphs_state, gr.Button("Graph Saved")
+# Function to save feedback
+def save_feedback(feed: str, user_id):
+    if len(feed) > 1:
+        timestamp = str(datetime.now().timestamp())
+        file = user_id + timestamp + ".json"
+        logs = {
+            "user_id": user_id,
+            "feedback": feed,
+            "time": timestamp,
+        }
+        log_on_azure(file, logs, share_client)
+        return "Feedback submitted, thank you!"

front/event_listeners.py ADDED Viewed

File without changes

front/tabs/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .tab_config import create_config_modal
+from .tab_examples import create_examples_tab
+from .tab_papers import create_papers_tab
+from .tab_figures import create_figures_tab
+from .chat_interface import create_chat_interface
+from .tab_about import create_about_tab

front/tabs/chat_interface.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+from gradio.components import ChatMessage
+# Initialize prompt and system template
+init_prompt = """
+Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
+❓ How to use
+- **Language**: You can ask me your questions in any language.
+- **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
+- **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
+- **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question.
+⚠️ Limitations
+*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
+🛈 Information
+Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
+What do you want to learn ?
+"""
+# UI Layout Components
+def create_chat_interface():
+    chatbot = gr.Chatbot(
+        value=[ChatMessage(role="assistant", content=init_prompt)],
+        type="messages",
+        show_copy_button=True,
+        show_label=False,
+        elem_id="chatbot",
+        layout="panel",
+        avatar_images=(None, "https://i.ibb.co/YNyd5W2/logo4.png"),
+        max_height="80vh",
+        height="100vh"
+    )
+    with gr.Row(elem_id="input-message"):
+        textbox = gr.Textbox(
+            placeholder="Ask me anything here!",
+            show_label=False,
+            scale=12,
+            lines=1,
+            interactive=True,
+            elem_id=f"input-textbox"
+        )
+        config_button = gr.Button("", elem_id="config-button")
+    return chatbot, textbox, config_button

front/tabs/main_tab.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+from .chat_interface import create_chat_interface
+from .tab_examples import create_examples_tab
+from .tab_papers import create_papers_tab
+from .tab_figures import create_figures_tab
+from .chat_interface import create_chat_interface
+def cqa_tab(tab_name):
+    # State variables
+    current_graphs = gr.State([])
+    with gr.Tab(tab_name):
+        with gr.Row(elem_id="chatbot-row"):
+            # Left column - Chat interface
+            with gr.Column(scale=2):
+                chatbot, textbox, config_button = create_chat_interface()
+            # Right column - Content panels
+            with gr.Column(scale=2, variant="panel", elem_id="right-panel"):
+                with gr.Tabs(elem_id="right_panel_tab") as tabs:
+                    # Examples tab
+                    with gr.TabItem("Examples", elem_id="tab-examples", id=0):
+                        examples_hidden, dropdown_samples, samples = create_examples_tab()
+                    # Sources tab
+                    with gr.Tab("Sources", elem_id="tab-sources", id=1) as tab_sources:
+                        sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
+                    # Recommended content tab
+                    with gr.Tab("Recommended content", elem_id="tab-recommended_content", id=2) as tab_recommended_content:
+                        with gr.Tabs(elem_id="group-subtabs") as tabs_recommended_content:
+                            # Figures subtab
+                            with gr.Tab("Figures", elem_id="tab-figures", id=3) as tab_figures:
+                                sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal = create_figures_tab()
+                            # Papers subtab
+                            with gr.Tab("Papers", elem_id="tab-citations", id=4) as tab_papers:
+                                papers_summary, papers_html, citations_network, papers_modal = create_papers_tab()
+                            # Graphs subtab
+                            with gr.Tab("Graphs", elem_id="tab-graphs", id=5) as tab_graphs:
+                                graphs_container = gr.HTML(
+                                    "<h2>There are no graphs to be displayed at the moment. Try asking another question.</h2>",
+                                    elem_id="graphs-container"
+                                )
+    return {
+        "chatbot": chatbot,
+        "textbox": textbox,
+        "tabs": tabs,
+        "sources_raw": sources_raw,
+        "new_figures": new_figures,
+        "current_graphs": current_graphs,
+        "examples_hidden": examples_hidden,
+        "dropdown_samples": dropdown_samples,
+        "samples": samples,
+        "sources_textbox": sources_textbox,
+        "figures_cards": figures_cards,
+        "gallery_component": gallery_component,
+        "config_button": config_button,
+        "papers_html": papers_html,
+        "citations_network": citations_network,
+        "papers_summary": papers_summary,
+        "tab_recommended_content": tab_recommended_content,
+        "tab_sources": tab_sources,
+        "tab_figures": tab_figures,
+        "tab_graphs": tab_graphs,
+        "tab_papers": tab_papers,
+        "graph_container": graphs_container
+    }

front/tabs/tab_about.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import gradio as gr
+# Citation information
+CITATION_LABEL = "BibTeX citation for ClimateQ&A"
+CITATION_TEXT = r"""@misc{climateqa,
+    author={Théo Alves Da Costa, Timothée Bohe},
+    title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
+    year={2024},
+    howpublished= {\url{https://climateqa.com}},
+}
+@software{climateqa,
+    author = {Théo Alves Da Costa, Timothée Bohe},
+    publisher = {ClimateQ&A},
+    title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss},
+}
+"""
+def create_about_tab():
+    with gr.Tab("About", elem_classes="max-height other-tabs"):
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(
+                    """
+                    ### More info
+                    - See more info at [https://climateqa.com](https://climateqa.com/docs/intro/)
+                    - Feedbacks on this [form](https://forms.office.com/e/1Yzgxm6jbp)
+                    ### Citation
+                    """
+                )
+                with gr.Accordion(CITATION_LABEL, elem_id="citation", open=False):
+                    gr.Textbox(
+                        value=CITATION_TEXT,
+                        label="",
+                        interactive=False,
+                        show_copy_button=True,
+                        lines=len(CITATION_TEXT.split('\n')),
+                    )

front/tabs/tab_config.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import gradio as gr
+from gradio_modal import Modal
+from climateqa.constants import POSSIBLE_REPORTS
+from typing import TypedDict
+class ConfigPanel(TypedDict):
+    config_open: gr.State
+    config_modal: Modal
+    dropdown_sources: gr.CheckboxGroup
+    dropdown_reports: gr.Dropdown
+    dropdown_external_sources: gr.CheckboxGroup
+    search_only: gr.Checkbox
+    dropdown_audience: gr.Dropdown
+    after: gr.Slider
+    output_query: gr.Textbox
+    output_language: gr.Textbox
+def create_config_modal():
+    config_open = gr.State(value=True)
+    with Modal(visible=False, elem_id="modal-config") as config_modal:
+        gr.Markdown("Reminders: You can talk in any language, ClimateQ&A is multi-lingual!")
+        dropdown_sources = gr.CheckboxGroup(
+            choices=["IPCC", "IPBES", "IPOS"],
+            label="Select source (by default search in all sources)",
+            value=["IPCC"],
+            interactive=True
+        )
+        dropdown_reports = gr.Dropdown(
+            choices=POSSIBLE_REPORTS,
+            label="Or select specific reports",
+            multiselect=True,
+            value=None,
+            interactive=True
+        )
+        dropdown_external_sources = gr.CheckboxGroup(
+            choices=["Figures (IPCC/IPBES)", "Papers (OpenAlex)", "Graphs (OurWorldInData)","POC region"],
+            label="Select database to search for relevant content",
+            value=["Figures (IPCC/IPBES)","POC region"],
+            interactive=True
+        )
+        search_only = gr.Checkbox(
+            label="Search only for recommended content without chating",
+            value=False,
+            interactive=True,
+            elem_id="checkbox-chat"
+        )
+        dropdown_audience = gr.Dropdown(
+            choices=["Children", "General public", "Experts"],
+            label="Select audience",
+            value="Experts",
+            interactive=True
+        )
+        after = gr.Slider(
+            minimum=1950,
+            maximum=2023,
+            step=1,
+            value=1960,
+            label="Publication date",
+            show_label=True,
+            interactive=True,
+            elem_id="date-papers",
+            visible=False
+        )
+        output_query = gr.Textbox(
+            label="Query used for retrieval",
+            show_label=True,
+            elem_id="reformulated-query",
+            lines=2,
+            interactive=False,
+            visible=False
+        )
+        output_language = gr.Textbox(
+            label="Language",
+            show_label=True,
+            elem_id="language",
+            lines=1,
+            interactive=False,
+            visible=False
+        )
+        dropdown_external_sources.change(
+            lambda x: gr.update(visible="Papers (OpenAlex)" in x),
+            inputs=[dropdown_external_sources],
+            outputs=[after]
+        )
+        close_config_modal_button = gr.Button("Validate and Close", elem_id="close-config-modal")
+        # return ConfigPanel(
+        #     config_open=config_open,
+        #     config_modal=config_modal,
+        #     dropdown_sources=dropdown_sources,
+        #     dropdown_reports=dropdown_reports,
+        #     dropdown_external_sources=dropdown_external_sources,
+        #     search_only=search_only,
+        #     dropdown_audience=dropdown_audience,
+        #     after=after,
+        #     output_query=output_query,
+        #     output_language=output_language
+        # )
+        return {
+            "config_open" : config_open,
+            "config_modal": config_modal,
+            "dropdown_sources": dropdown_sources,
+            "dropdown_reports": dropdown_reports,
+            "dropdown_external_sources": dropdown_external_sources,
+            "search_only": search_only,
+            "dropdown_audience": dropdown_audience,
+            "after": after,
+            "output_query": output_query,
+            "output_language": output_language,
+            "close_config_modal_button": close_config_modal_button
+        }

front/tabs/tab_examples.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import gradio as gr
+from climateqa.sample_questions import QUESTIONS
+def create_examples_tab():
+    examples_hidden = gr.Textbox(visible=False, elem_id=f"examples-hidden")
+    first_key = list(QUESTIONS.keys())[0]
+    dropdown_samples = gr.Dropdown(
+        choices=QUESTIONS.keys(),
+        value=first_key,
+        interactive=True,
+        label="Select a category of sample questions",
+        elem_id="dropdown-samples"
+    )
+    samples = []
+    for i, key in enumerate(QUESTIONS.keys()):
+        examples_visible = (i == 0)
+        with gr.Row(visible=examples_visible) as group_examples:
+            examples_questions = gr.Examples(
+                examples=QUESTIONS[key],
+                inputs=[examples_hidden],
+                examples_per_page=8,
+                run_on_click=False,
+                elem_id=f"examples{i}",
+                api_name=f"examples{i}"
+            )
+        samples.append(group_examples)
+    def change_sample_questions(key):
+        index = list(QUESTIONS.keys()).index(key)
+        visible_bools = [False] * len(samples)
+        visible_bools[index] = True
+        return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
+    # event listener
+    dropdown_samples.change(change_sample_questions, dropdown_samples, samples)
+    return examples_hidden

front/tabs/tab_figures.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gradio as gr
+from gradio_modal import Modal
+def create_figures_tab():
+    sources_raw = gr.State()
+    new_figures = gr.State([])
+    used_figures = gr.State([])
+    with Modal(visible=False, elem_id="modal_figure_galery") as figure_modal:
+        gallery_component = gr.Gallery(
+            object_fit='scale-down',
+            elem_id="gallery-component",
+            height="80vh"
+        )
+    show_full_size_figures = gr.Button(
+        "Show figures in full size",
+        elem_id="show-figures",
+        interactive=True
+    )
+    show_full_size_figures.click(
+        lambda: Modal(visible=True),
+        None,
+        figure_modal
+    )
+    figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
+    return sources_raw, new_figures, used_figures, gallery_component, figures_cards, figure_modal

front/tabs/tab_papers.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import gradio as gr
+from gradio_modal import Modal
+def create_papers_tab():
+    with gr.Accordion(
+        visible=True,
+        elem_id="papers-summary-popup",
+        label="See summary of relevant papers",
+        open=False
+    ) as summary_popup:
+        papers_summary = gr.Markdown("", visible=True, elem_id="papers-summary")
+    with gr.Accordion(
+        visible=True,
+        elem_id="papers-relevant-popup",
+        label="See relevant papers",
+        open=False
+    ) as relevant_popup:
+        papers_html = gr.HTML(show_label=False, elem_id="papers-textbox")
+    btn_citations_network = gr.Button("Explore papers citations network")
+    with Modal(visible=False) as papers_modal:
+        citations_network = gr.HTML(
+            "<h3>Citations Network Graph</h3>",
+            visible=True,
+            elem_id="papers-citations-network"
+        )
+    btn_citations_network.click(
+        lambda: Modal(visible=True),
+        None,
+        papers_modal
+    )
+    return papers_summary, papers_html, citations_network, papers_modal

front/tabs/tab_recommended_content.py ADDED Viewed

File without changes

front/utils.py CHANGED Viewed

@@ -39,23 +39,33 @@ def parse_output_llm_with_sources(output:str)->str:
     content_parts = "".join(parts)
     return content_parts
-def process_figures(docs:list)->tuple:
-    gallery=[]
-    used_figures =[]
     figures = '<div class="figures-container"><p></p> </div>'
     docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
-    for i, doc in enumerate(docs_figures):
-        if doc.metadata["chunk_type"] == "image":
-            if doc.metadata["figure_code"] != "N/A":
-                title = f"{doc.metadata['figure_code']} - {doc.metadata['short_name']}"
-            else:
-                title = f"{doc.metadata['short_name']}"
-            if title not in used_figures:
-                used_figures.append(title)
                 try:
-                    key = f"Image {i+1}"
                     image_path = doc.metadata["image_path"].split("documents/")[1]
                     img = get_image_from_azure_blob_storage(image_path)
@@ -68,12 +78,12 @@ def process_figures(docs:list)->tuple:
                     img_str = base64.b64encode(buffered.getvalue()).decode()
-                    figures = figures + make_html_figure_sources(doc, i, img_str)
                     gallery.append(img)
                 except Exception as e:
-                    print(f"Skipped adding image {i} because of {e}")
-    return figures, gallery
 def generate_html_graphs(graphs:list)->str:

     content_parts = "".join(parts)
     return content_parts
+def process_figures(docs:list, new_figures:list)->tuple:
+    if new_figures == []:
+        return docs, "", []
+    docs = docs + new_figures
     figures = '<div class="figures-container"><p></p> </div>'
+    gallery = []
+    used_figures = []
+    if docs == []:
+        return docs, figures, gallery
     docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
+    for i_doc, doc in enumerate(docs_figures):
+        if doc.metadata["chunk_type"] == "image":
+            path = doc.metadata["image_path"]
+            if path not in used_figures:
+                used_figures.append(path)
+                figure_number = len(used_figures)
                 try:
+                    key = f"Image {figure_number}"
                     image_path = doc.metadata["image_path"].split("documents/")[1]
                     img = get_image_from_azure_blob_storage(image_path)
                     img_str = base64.b64encode(buffered.getvalue()).decode()
+                    figures = figures + make_html_figure_sources(doc, figure_number, img_str)
                     gallery.append(img)
                 except Exception as e:
+                    print(f"Skipped adding image {figure_number} because of {e}")
+    return docs, figures, gallery
 def generate_html_graphs(graphs:list)->str:

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ azure-storage-blob
 python-dotenv==1.0.0
 langchain==0.2.1
 langchain_openai==0.1.7
-langgraph==0.0.55
 pinecone-client==4.1.0
 sentence-transformers==2.6.0
 huggingface-hub

 python-dotenv==1.0.0
 langchain==0.2.1
 langchain_openai==0.1.7
+langgraph==0.2.70
 pinecone-client==4.1.0
 sentence-transformers==2.6.0
 huggingface-hub

sandbox/20241104 - CQA - StepByStep CQA.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

style.css CHANGED Viewed

@@ -1,89 +1,127 @@
 /* :root {
     --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
-  } */
-#tab-recommended_content{
-    padding-top: 0px;
-    padding-left : 0px;
-    padding-right: 0px;
 }
 #group-subtabs {
     /* display: block; */
-    width: 100%; /* Ensures the parent uses the full width */
     position : sticky;
 }
-#group-subtabs .tab-container {
-    display: flex;
-    text-align: center;
-    width: 100%; /* Ensures the tabs span the full width */
-}
-#group-subtabs .tab-container button {
-    flex: 1; /* Makes each button take equal width */
 }
-#papers-summary-popup button span{
-    /* make label of accordio in bold, center, and bigger */
-    font-size: 16px;
     font-weight: bold;
-    text-align: center;
 }
-#papers-relevant-popup span{
-    /* make label of accordio in bold, center, and bigger */
-    font-size: 16px;
-    font-weight: bold;
-    text-align: center;
 }
-#tab-citations .button{
-    padding: 12px 16px;
-    font-size: 16px;
     font-weight: bold;
-    cursor: pointer;
-    border: none;
-    outline: none;
     text-align: left;
-    transition: background-color 0.3s ease;
 }
-.gradio-container {
-    width: 100%!important;
-    max-width: 100% !important;
 }
-/* fix for huggingface infinite growth*/
-main.flex.flex-1.flex-col {
-    max-height: 95vh !important;
 }
-button#show-figures{
-    /* Base styles */
-    background-color: #f5f5f5;
-    border: 1px solid #e0e0e0;
-    border-radius: 4px;
-    color: #333333;
-    cursor: pointer;
-    width: 100%;
-    text-align: center;
 }
-.avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
-    width: 100%;
-    height: 100%;
-    object-fit: cover;
-    border-radius: 50%;
-    padding: 0px;
-    margin: 0px;
 }
 .warning-box {
     background-color: #fff3cd;
     border: 1px solid #ffeeba;
@@ -93,32 +131,20 @@ button#show-figures{
     color: #856404;
     display: inline-block;
     margin-bottom: 15px;
-  }
 .tip-box {
     background-color: #f0f9ff;
     border: 1px solid #80d4fa;
     border-radius: 4px;
-    margin-top:20px;
     padding: 15px 20px;
     font-size: 14px;
     display: inline-block;
-    margin-bottom: 15px;
     width: auto;
-    color:black !important;
-}
-body.dark .warning-box * {
-    color:black !important;
-}
-body.dark .tip-box * {
-    color:black !important;
 }
 .tip-box-title {
     font-weight: bold;
     font-size: 14px;
@@ -130,116 +156,128 @@ body.dark .tip-box * {
     margin-right: 5px;
 }
-.gr-box {border-color: #d6c37c}
-#hidden-message{
-    display:none;
 }
-.message{
-    font-size:14px !important;
-}
-.card-content img {
-    display: block;
-    margin: auto;
-    max-width: 100%; /* Ensures the image is responsive */
-    height: auto;
 }
-a {
-    text-decoration: none;
-    color: inherit;
 }
-.doc-ref sup{
-    color:#dc2626!important;
-    /* margin-right:1px; */
 }
-.card {
-    background-color: white;
-    border-radius: 10px;
-    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-    overflow: hidden;
-    display: flex;
-    flex-direction: column;
-    margin:20px;
 }
-.card-content {
-    padding: 20px;
 }
-.card-content h2 {
-    font-size: 14px !important;
-    font-weight: bold;
-    margin-bottom: 10px;
-    margin-top:0px !important;
-    color:#dc2626!important;;
 }
-.card-content p {
-    font-size: 12px;
-    margin-bottom: 0;
 }
-.card-footer {
-    background-color: #f4f4f4;
-    font-size: 10px;
     padding: 10px;
     display: flex;
-    justify-content: space-between;
     align-items: center;
 }
-.card-footer span {
-    flex-grow: 1;
-    text-align: left;
-    color: #999 !important;
 }
-.pdf-link {
-    display: inline-flex;
-    align-items: center;
-    margin-left: auto;
-    text-decoration: none!important;
-    font-size: 14px;
 }
-.message.user{
-    /* background-color:#7494b0 !important; */
-    border:none;
-    /* color:white!important; */
 }
-.message.bot{
-    /* background-color:#f2f2f7 !important; */
-    border:none;
 }
-label.selected{
-  background: #93c5fd !important;
 }
-#submit-button{
-    padding:0px !important;
 }
-#modal-config .block.modal-block.padded {
-    padding-top: 25px;
-    height: 100vh;
-}
-#modal-config .modal-container{
-    margin: 0px;
-    padding: 0px;
 }
-/* Modal styles */
 #modal-config {
     position: fixed;
     top: 0;
@@ -252,28 +290,23 @@ label.selected{
     padding: 15px;
     transform: none;
 }
-#modal-config .close{
-    display: none;
 }
-/* Push main content to the right when modal is open */
-/* .modal ~ * {
-    margin-left: 300px;
-    transition: margin-left 0.3s ease;
-} */
-#modal-config .modal .wrap ul{
-    position:static;
-    top: 100%;
-    left: 0;
-    /* min-height: 100px; */
-    height: 100%;
-    /* margin-top: 0; */
-    z-index: 9999;
-    pointer-events: auto;
-    height: 200px;
 }
-#config-button{
     background: none;
     border: none;
     padding: 8px;
@@ -296,155 +329,231 @@ label.selected{
     background-color: rgba(0, 0, 0, 0.1);
 }
-#checkbox-config{
-    display: block;
-    position: absolute;
-    background: none;
     border: none;
-    padding: 8px;
     cursor: pointer;
-    width: 40px;
-    height: 40px;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    border-radius: 50%;
-    transition: background-color 0.2s;
-    font-size: 20px;
     text-align: center;
 }
-#checkbox-config:checked{
-    display: block;
 }
-@media screen and (min-width: 1024px) {
-    /* Additional style for scrollable tab content */
-    /* div#tab-recommended_content {
-        overflow-y: auto;
-        max-height: 80vh;
-    } */
-    .gradio-container {
-        max-height: calc(100vh - 190px) !important;
-        overflow: hidden;
-    }
-    /* div#chatbot{
-        height:calc(100vh - 170px) !important;
-        max-height:calc(100vh - 170px) !important;
-    } */
-    div#tab-examples{
-        height:calc(100vh - 190px) !important;
-        overflow-y: scroll !important;
-        /* overflow-y: auto; */
-    }
-    div#sources-textbox{
-        height:calc(100vh - 190px) !important;
-        overflow-y: scroll !important;
-        /* overflow-y: auto !important; */
-    }
-    div#graphs-container{
-        height:calc(100vh - 210px) !important;
-        overflow-y: scroll !important;
-    }
-    div#sources-figures{
-        height:calc(100vh - 300px) !important;
-        max-height: 90vh !important;
-        overflow-y: scroll !important;
-    }
-    div#graphs-container{
-        height:calc(100vh - 300px) !important;
-        max-height: 90vh !important;
-        overflow-y: scroll !important;
     }
-    div#tab-citations{
-        height:calc(100vh - 300px) !important;
-        max-height: 90vh !important;
         overflow-y: scroll !important;
     }
-    div#tab-config{
-        height:calc(100vh - 190px) !important;
         overflow-y: scroll !important;
-        /* overflow-y: auto !important; */
     }
-    /* Force container to respect height limits */
-    .main-component{
-        contain: size layout;
-        overflow: hidden;
     }
-    div#chatbot-row{
-        max-height:calc(100vh - 90px) !important;
     }
-/*
-    .max-height{
-        height:calc(100vh - 90px) !important;
-        max-height:calc(100vh - 90px) !important;
         overflow-y: auto;
     }
-*/
 }
-footer {
-    visibility: hidden;
-    display:none !important;
-}
 @media screen and (max-width: 767px) {
-    /* Your mobile-specific styles go here */
-    div#chatbot{
-        height:500px !important;
     }
-    #submit-button{
-        padding:0px !important;
         min-width: 80px;
     }
-    /* This will hide all list items */
     div.tab-nav button {
         display: none !important;
     }
-    /* This will show only the first list item */
-    div.tab-nav button:first-child {
-        display: block !important;
-    }
-    /* This will show only the first list item */
     div.tab-nav button:nth-child(2) {
         display: block !important;
     }
-    #right-panel button{
         display: block !important;
     }
-    /* ... add other mobile-specific styles ... */
 }
 @media (prefers-color-scheme: dark) {
-    .card{
         background-color: #374151;
     }
-    .card-image > .card-content{
         background-color: rgb(55, 65, 81) !important;
     }
@@ -452,251 +561,48 @@ footer {
         background-color: #404652;
     }
-    .container > .wrap{
         background-color: #374151 !important;
-        color:white !important;
     }
-    .card-content h2{
-        color:#e7754f !important;
-    }
-    .doc-ref sup{
-        color:rgb(235 109 35)!important;
-        /* margin-right:1px; */
     }
     .card-footer span {
-        color:white !important;
     }
-}
-.doc-ref{
-    color:#dc2626!important;
-    margin-right:1px;
-}
-.tabitem{
-    border:none !important;
-}
-.other-tabs > div{
-    padding-left:40px;
-    padding-right:40px;
-    padding-top:10px;
-}
-.gallery-item > div{
-    white-space: normal !important; /* Allow the text to wrap */
-    word-break: break-word !important; /* Break words to prevent overflow */
-    overflow-wrap: break-word !important; /* Break long words if necessary */
-  }
-span.chatbot > p > img{
-    margin-top:40px !important;
-    max-height: none !important;
-    max-width: 80% !important;
-    border-radius:0px !important;
-}
-.chatbot-caption{
-    font-size:11px;
-    font-style:italic;
-    color:#508094;
-}
-.ai-generated{
-    font-size:11px!important;
-    font-style:italic;
-    color:#73b8d4 !important;
-}
-.card-image > .card-content{
-    background-color:#f1f7fa;
-}
-.tab-nav > button.selected{
-    color:#4b8ec3;
-    font-weight:bold;
-    border:none;
-}
-.tab-nav{
-    border:none !important;
-}
-#input-textbox > label > textarea{
-    border-radius:40px;
-    padding-left:30px;
-    resize:none;
-}
-#input-message > div{
-    border:none;
-}
-#dropdown-samples{
-  background:none !important;
-}
-#dropdown-samples > .container > .wrap{
-  background-color:white;
-}
-#tab-examples > div > .form{
-  border:none;
-  background:none !important;
-}
-.a-doc-ref{
-	text-decoration: none !important;
 }
-.dropdown {
-    position: relative;
-    display:inline-block;
-    margin-bottom: 10px;
-  }
-  .dropdown-toggle {
-    background-color: #f2f2f2;
-    color: black;
-    padding: 10px;
-    font-size: 16px;
-    cursor: pointer;
-    display: block;
-    width: 400px; /* Adjust width as needed */
-    position: relative;
-    display: flex;
-    align-items: center; /* Vertically center the contents */
-    justify-content: left;
-  }
-  .dropdown-toggle .caret {
-    content: "";
-    position: absolute;
-    right: 10px;
-    top: 50%;
-    border-left: 5px solid transparent;
-    border-right: 5px solid transparent;
-    border-top: 5px solid black;
-    transform: translateY(-50%);
-  }
-  input[type="checkbox"] {
-    display: none !important;
-  }
-  input[type="checkbox"]:checked + .dropdown-content {
     display: block;
-  }
-  #checkbox-chat input[type="checkbox"] {
-    display: flex !important;
-  }
-  .dropdown-content {
-    display: none;
     position: absolute;
-    background-color: #f9f9f9;
-    min-width: 300px;
-    box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
-    z-index: 1;
-    padding: 12px;
-    border: 1px solid #ccc;
-  }
-  input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
-    display: block;
-  }
-  input[type="checkbox"]:checked + .dropdown-toggle .caret {
-    border-top: 0;
-    border-bottom: 5px solid black;
-  }
-.loader {
-    border: 1px solid #d0d0d0 !important; /* Light grey background */
-    border-top: 1px solid #db3434 !important; /* Blue color */
-    border-right: 1px solid #3498db !important; /* Blue color */
     border-radius: 50%;
-    width: 20px;
-    height: 20px;
-    animation: spin 2s linear infinite;
-    display:inline-block;
-    margin-right:10px !important;
-}
-.checkmark{
-    color:green !important;
-    font-size:18px;
-    margin-right:10px !important;
-}
-@keyframes spin {
-    0% { transform: rotate(0deg); }
-    100% { transform: rotate(360deg); }
-}
-.relevancy-score{
-    margin-top:10px !important;
-    font-size:10px !important;
-    font-style:italic;
-}
-.score-green{
-    color:green !important;
-}
-.score-orange{
-    color:orange !important;
-}
-.score-red{
-    color:red !important;
-}
-/* Mobile specific adjustments */
-@media screen and (max-width: 767px) {
-    div#tab-recommended_content {
-        max-height: 50vh; /* Reduce height for smaller screens */
-        overflow-y: auto;
-    }
-}
-/* Additional style for scrollable tab content */
-div#tab-saved-graphs {
-    overflow-y: auto; /* Enable vertical scrolling */
-    max-height: 80vh; /* Adjust height as needed */
-}
-/* Mobile specific adjustments */
-@media screen and (max-width: 767px) {
-    div#tab-saved-graphs {
-        max-height: 50vh; /* Reduce height for smaller screens */
-        overflow-y: auto;
-    }
-}
-.message-buttons-left.panel.message-buttons.with-avatar {
-    display: none;
-}
-/* Specific fixes for Hugging Face Space iframe */
-.h-full {
-    height: auto !important;
-    min-height: 0 !important;
 }
-.space-content {
-    height: auto !important;
-    max-height: 100vh !important;
-    overflow: hidden;
 }

+/* Root Variables */
 /* :root {
     --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
+} */
+/* Layout & Container Styles */
+.gradio-container {
+    width: 100% !important;
+    max-width: 100% !important;
+}
+main.flex.flex-1.flex-col {
+    max-height: 95vh !important;
+}
+.main-component {
+    contain: size layout;
+    overflow: hidden;
+}
+/* Tab Styles */
+#tab-recommended_content {
+    padding: 0;
 }
 #group-subtabs {
     /* display: block; */
     position : sticky;
 }
 }
+.tab-nav {
+    border: none !important;
+}
+.tab-nav > button.selected {
+    color: #4b8ec3;
     font-weight: bold;
+    border: none;
+}
+.tabitem {
+    border: none !important;
 }
+.other-tabs > div {
+    padding: 40px 40px 10px;
 }
+/* Card Styles */
+.card {
+    background-color: white;
+    border-radius: 10px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    overflow: hidden;
+    display: flex;
+    flex-direction: column;
+    margin: 20px;
+}
+.card-content {
+    padding: 20px;
+}
+.card-content h2 {
+    font-size: 14px !important;
     font-weight: bold;
+    margin: 0 0 10px !important;
+    color: #dc2626 !important;
+}
+.card-content p {
+    font-size: 12px;
+    margin-bottom: 0;
+}
+.card-content img {
+    display: block;
+    margin: auto;
+    max-width: 100%;
+    height: auto;
+}
+.card-footer {
+    background-color: #f4f4f4;
+    font-size: 10px;
+    padding: 10px;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+}
+.card-footer span {
+    flex-grow: 1;
     text-align: left;
+    color: #999 !important;
 }
+.card-image > .card-content {
+    background-color: #f1f7fa;
+}
+/* Message & Chat Styles */
+.message {
+    font-size: 14px !important;
 }
+.message.user, .message.bot {
+    border: none;
 }
+#input-textbox > label > textarea {
+    border-radius: 40px;
+    padding-left: 30px;
+    resize: none;
 }
+#input-message > div {
+    border: none;
 }
+/* Alert Boxes */
 .warning-box {
     background-color: #fff3cd;
     border: 1px solid #ffeeba;
     color: #856404;
     display: inline-block;
     margin-bottom: 15px;
+}
 .tip-box {
     background-color: #f0f9ff;
     border: 1px solid #80d4fa;
     border-radius: 4px;
+    margin: 20px 0 15px;
     padding: 15px 20px;
     font-size: 14px;
     display: inline-block;
     width: auto;
+    color: black !important;
 }
 .tip-box-title {
     font-weight: bold;
     font-size: 14px;
     margin-right: 5px;
 }
+/* Loader Animation */
+.loader {
+    border: 1px solid #d0d0d0 !important;
+    border-top: 1px solid #db3434 !important;
+    border-right: 1px solid #3498db !important;
+    border-radius: 50%;
+    width: 20px;
+    height: 20px;
+    animation: spin 2s linear infinite;
+    display: inline-block;
+    margin-right: 10px !important;
 }
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
 }
+/* PDF Link Styles */
+.pdf-link {
+    display: inline-flex;
+    align-items: center;
+    margin-left: auto;
+    text-decoration: none!important;
+    font-size: 14px;
 }
+/* Document Reference Styles */
+.doc-ref sup {
+    color: #dc2626!important;
 }
+.doc-ref {
+    color: #dc2626!important;
+    margin-right: 1px;
+}
+/* Chatbot & Image Styles */
+span.chatbot > p > img {
+    margin-top: 40px !important;
+    max-height: none !important;
+    max-width: 80% !important;
+    border-radius: 0px !important;
 }
+.chatbot-caption {
+    font-size: 11px;
+    font-style: italic;
+    color: #508094;
 }
+.ai-generated {
+    font-size: 11px!important;
+    font-style: italic;
+    color: #73b8d4 !important;
 }
+/* Dropdown Styles */
+.dropdown {
+    position: relative;
+    display: inline-block;
+    margin-bottom: 10px;
 }
+.dropdown-toggle {
+    background-color: #f2f2f2;
+    color: black;
     padding: 10px;
+    font-size: 16px;
+    cursor: pointer;
     display: flex;
+    width: 400px;
     align-items: center;
+    justify-content: left;
+    position: relative;
 }
+.dropdown-toggle .caret {
+    content: "";
+    position: absolute;
+    right: 10px;
+    top: 50%;
+    border-left: 5px solid transparent;
+    border-right: 5px solid transparent;
+    border-top: 5px solid black;
+    transform: translateY(-50%);
 }
+.dropdown-content {
+    display: none;
+    position: absolute;
+    background-color: #f9f9f9;
+    min-width: 300px;
+    box-shadow: 0 8px 16px 0 rgba(0,0,0,0.2);
+    z-index: 1;
+    padding: 12px;
+    border: 1px solid #ccc;
 }
+/* Checkbox Styles */
+input[type="checkbox"] {
+    display: none !important;
 }
+#checkbox-chat input[type="checkbox"] {
+    display: flex !important;
 }
+input[type="checkbox"]:checked + .dropdown-content {
+    display: block;
 }
+input[type="checkbox"]:checked + .dropdown-toggle + .dropdown-content {
+    display: block;
 }
+input[type="checkbox"]:checked + .dropdown-toggle .caret {
+    border-top: 0;
+    border-bottom: 5px solid black;
 }
+/* Modal Styles */
 #modal-config {
     position: fixed;
     top: 0;
     padding: 15px;
     transform: none;
 }
+#modal-config .block.modal-block.padded {
+    padding-top: 25px;
+    height: 100vh;
 }
+#modal-config .modal-container {
+    margin: 0px;
+    padding: 0px;
+}
+#modal-config .close {
+    display: none;
 }
+/* Config Button Styles */
+#config-button {
     background: none;
     border: none;
     padding: 8px;
     background-color: rgba(0, 0, 0, 0.1);
 }
+/* Relevancy Score Styles */
+.relevancy-score {
+    margin-top: 10px !important;
+    font-size: 10px !important;
+    font-style: italic;
+}
+.score-green {
+    color: green !important;
+}
+.score-orange {
+    color: orange !important;
+}
+.score-red {
+    color: red !important;
+}
+/* Gallery Styles */
+.gallery-item > div {
+    white-space: normal !important;
+    word-break: break-word !important;
+    overflow-wrap: break-word !important;
+}
+/* Avatar Styles */
+.avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
+    width: 100%;
+    height: 100%;
+    object-fit: cover;
+    border-radius: 50%;
+    padding: 0px;
+    margin: 0px;
+}
+/* Message Button Styles */
+.message-buttons-left.panel.message-buttons.with-avatar {
+    display: none;
+}
+/* Checkmark Styles */
+.checkmark {
+    color: green !important;
+    font-size: 18px;
+    margin-right: 10px !important;
+}
+/* Papers Summary & Relevant Popup Styles */
+#papers-summary-popup button span,
+#papers-relevant-popup span {
+    font-size: 16px;
+    font-weight: bold;
+    text-align: center;
+}
+/* Citations Tab Button Style */
+#tab-citations .button {
+    padding: 12px 16px;
+    font-size: 16px;
+    font-weight: bold;
+    cursor: pointer;
     border: none;
+    outline: none;
+    text-align: left;
+    transition: background-color 0.3s ease;
+}
+/* Show Figures Button Style */
+button#show-figures {
+    background-color: #f5f5f5;
+    border: 1px solid #e0e0e0;
+    border-radius: 4px;
+    color: #333333;
     cursor: pointer;
+    width: 100%;
     text-align: center;
 }
+/* Gradio Box Style */
+.gr-box {
+    border-color: #d6c37c;
 }
+/* Hidden Message Style */
+#hidden-message {
+    display: none;
+}
+/* Label Selected Style */
+label.selected {
+    background: #93c5fd !important;
+}
+/* Submit Button Style */
+#submit-button {
+    padding: 0px !important;
+}
+/* Hugging Face Space Fixes */
+.h-full {
+    height: auto !important;
+    min-height: 0 !important;
+}
+.space-content {
+    height: auto !important;
+    max-height: 100vh !important;
+    overflow: hidden;
+}
+/* Dropdown Samples Style */
+#dropdown-samples {
+    background: none !important;
+}
+#dropdown-samples > .container > .wrap {
+    background-color: white;
+}
+/* Tab Examples Form Style */
+#tab-examples > div > .form {
+    border: none;
+    background: none !important;
+}
+/* Utility Classes */
+.hidden {
+    display: none !important;
+}
+footer {
+    display: none !important;
+    visibility: hidden;
+}
+a {
+    text-decoration: none;
+    color: inherit;
+}
+.a-doc-ref {
+    text-decoration: none !important;
+}
+/* Media Queries */
+/* Desktop Media Query */
+@media screen and (min-width: 1024px) {
+    .gradio-container {
+        max-height: calc(100vh - 190px) !important;
+        overflow: hidden;
     }
+    div#tab-examples,
+    div#sources-textbox,
+    div#tab-config {
+        height: calc(100vh - 190px) !important;
         overflow-y: scroll !important;
     }
+    div#sources-figures,
+    div#graphs-container,
+    div#tab-citations {
+        height: calc(100vh - 300px) !important;
+        max-height: 90vh !important;
         overflow-y: scroll !important;
     }
+    div#chatbot-row {
+        max-height: calc(100vh - 90px) !important;
     }
+    div#graphs-container {
+        height: calc(100vh - 210px) !important;
+        overflow-y: scroll !important;
     }
+    div#tab-saved-graphs {
         overflow-y: auto;
+        max-height: 80vh;
     }
 }
+/* Mobile Media Query */
 @media screen and (max-width: 767px) {
+    div#chatbot {
+        height: 500px !important;
     }
+    #submit-button {
+        padding: 0 !important;
         min-width: 80px;
     }
     div.tab-nav button {
         display: none !important;
     }
+    div.tab-nav button:first-child,
     div.tab-nav button:nth-child(2) {
         display: block !important;
     }
+    #right-panel button {
         display: block !important;
     }
+    div#tab-recommended_content {
+        max-height: 50vh;
+        overflow-y: auto;
+    }
+    div#tab-saved-graphs {
+        max-height: 50vh;
+        overflow-y: auto;
+    }
 }
+/* Dark Mode */
 @media (prefers-color-scheme: dark) {
+    .card {
         background-color: #374151;
     }
+    .card-image > .card-content {
         background-color: rgb(55, 65, 81) !important;
     }
         background-color: #404652;
     }
+    .container > .wrap {
         background-color: #374151 !important;
+        color: white !important;
     }
+    .card-content h2 {
+        color: #e7754f !important;
     }
     .card-footer span {
+        color: white !important;
     }
+    body.dark .warning-box *,
+    body.dark .tip-box * {
+        color: black !important;
+    }
+    .doc-ref sup {
+        color: rgb(235 109 35)!important;
+    }
 }
+/* Checkbox Config Style */
+#checkbox-config {
     display: block;
     position: absolute;
+    background: none;
+    border: none;
+    padding: 8px;
+    cursor: pointer;
+    width: 40px;
+    height: 40px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
     border-radius: 50%;
+    transition: background-color 0.2s;
+    font-size: 20px;
+    text-align: center;
 }
+#checkbox-config:checked {
+    display: block;
 }