import gradio as gr
from gradio import ChatMessage
import time

def simulate_thinking_chat(message: str, history: list):
    history.append(
        ChatMessage(
            role="assistant",
            content="",
            metadata={"title": "Thinking... ", "log": "Starting analysis"}
        )
    )
    time.sleep(0.5)
    yield history

    thoughts = [
        "First, I need to understand the core aspects of the query...",
        "Now, considering the broader context and implications...",
        "Analyzing potential approaches to formulate a comprehensive answer...",
        "Finally, structuring the response for clarity and completeness..."
    ]

    accumulated_thoughts = ""

    for i, thought in enumerate(thoughts):
        time.sleep(0.5)

        accumulated_thoughts += f"- {thought}\n\n"

        history[-1] = ChatMessage(
            role="assistant",
            content=accumulated_thoughts.strip(),
            metadata={
                "title": "Thinking...",
                "log": f"Step {i+1} completed.",
                "duration": 0.5 * (i + 1)
            }
        )
        yield history

    history.append(
        ChatMessage(
            role="assistant",
            content="Based on my thoughts and analysis above, my response is: This dummy repro shows how thoughts of a thinking LLM can be progressively shown before providing its final answer."
        )
    )
    yield history

with gr.Blocks() as demo:
    gr.Markdown("# Thinking LLM Demo 🤔")
    chatbot = gr.Chatbot(type="messages", render_markdown=True)
    msg = gr.Textbox(placeholder="Type your message...")

    msg.submit(
        lambda m, h: (m, h + [ChatMessage(role="user", content=m)]),
        [msg, chatbot],
        [msg, chatbot]
    ).then(
        simulate_thinking_chat,
        [msg, chatbot],
        chatbot
    )

if __name__ == "__main__":
    demo.launch()