import gradio as gr
import spaces  # Required for ZeroGPU
from transformers import pipeline, AutoTokenizer
from duckduckgo_search import DDGS
from datetime import datetime

# Initialize a lightweight text generation model on CPU
generator = pipeline("text-generation", model="distilgpt2", device=-1)  # -1 ensures CPU by default
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")  # For better token handling

# Web search function (CPU-based)
def get_web_results(query: str, max_results: int = 5) -> list:
    """Fetch web results synchronously for Zero GPU compatibility, focusing on high-quality sources."""
    try:
        with DDGS() as ddgs:
            results = list(ddgs.text(query, max_results=max_results))
            # Filter for high-quality, relevant results (e.g., educational, authoritative sites)
            filtered_results = [
                {"title": r.get("title", "No Title"), "snippet": r["body"], "url": r["href"]}
                for r in results
                if any(domain in r["href"] for domain in ["geeksforgeeks.org", "realpython.com", "coursera.org", "udemy.com", "stackexchange.com"])
                or "edu" in r["href"]
            ]
            return filtered_results if filtered_results else results  # Fall back to all results if no high-quality ones found
    except Exception as e:
        return [{"title": "Error", "snippet": f"Failed to fetch results: {str(e)}", "url": "#"}]

# Format prompt for the AI model to generate high-quality, structured answers
def format_prompt(query: str, web_results: list) -> str:
    """Create a detailed prompt with web context to guide the model toward high-quality answers."""
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S IST")
    context = "\n".join([f"- {r['title']}: {r['snippet']}" for r in web_results])
    return f"""Current Time: {current_time}
Query: {query}
Web Context (High-Quality Sources):
{context}
Provide a detailed, step-by-step answer in markdown format with clear headings (e.g., #, ##), bullet points, and citations [1], [2], etc. Ensure the answer is structured, relevant, and visually appealing, addressing the user's intent comprehensively. If the query is informational (e.g., 'what,' 'how,' 'why'), offer in-depth insights, examples, and practical advice. If no high-quality answer is possible, state, 'I couldn’t find sufficient high-quality information to provide a detailed answer, but here’s what I found:' followed by a summary of web results."""

# GPU-decorated answer generation for high-quality output
@spaces.GPU(duration=180)  # Increased duration for more detailed generation
def generate_answer(prompt: str) -> str:
    """Generate a detailed, high-quality research answer using GPU."""
    # Tokenize and truncate prompt to fit within limits
    tokenized_prompt = tokenizer(prompt, truncation=True, max_length=200, return_tensors="pt")
    input_ids = tokenized_prompt["input_ids"]
    
    # Generate response with more tokens and better sampling for quality
    response = generator(
        prompt,
        max_new_tokens=400,  # Increased for more detailed output
        num_return_sequences=1,
        truncation=True,
        do_sample=True,
        temperature=0.7,  # Controlled randomness for coherent, detailed output
        top_p=0.9,  # Focus on top probabilities for quality
        top_k=50  # Limit to top 50 tokens for better coherence
    )[0]["generated_text"]
    
    answer_start = response.find("Provide a detailed") + len("Provide a detailed, step-by-step answer in markdown format with clear headings (e.g., #, ##), bullet points, and citations [1], [2], etc. Ensure the answer is structured, relevant, and visually appealing, addressing the user's intent comprehensively. If the query is informational (e.g., 'what,' 'how,' 'why'), offer in-depth insights, examples, and practical advice. If no high-quality answer is possible, state, 'I couldn’t find sufficient high-quality information to provide a detailed answer, but here’s what I found:' followed by a summary of web results.")
    return response[answer_start:].strip() if answer_start > -1 else "I couldn’t find sufficient high-quality information to provide a detailed answer, but here’s what I found:\n\n" + "\n".join([f"- {r['title']}: {r['snippet']}" for r in get_web_results(query, max_results=3)])

# Format sources for display with enhanced styling
def format_sources(web_results: list) -> str:
    """Create a styled HTML list of sources with better visuals for high-quality presentation."""
    if not web_results:
        return "<div class='no-sources'>No sources available</div>"
    
    sources_html = "<div class='sources-list'>"
    for i, res in enumerate(web_results, 1):
        sources_html += f"""
        <div class='source-item'>
            <span class='source-number'>[{i}]</span>
            <a href='{res['url']}' target='_blank' class='source-link'>{res['title']}</a>
            <p class='source-snippet'>{res['snippet'][:150]}...</p>
        </div>
        """
    sources_html += "</div>"
    return sources_html

# Main processing function
def process_deep_research(query: str, history: list):
    """Handle the deep research process for any query with high-quality output."""
    if not history:
        history = []
    
    # Fetch web results (CPU)
    web_results = get_web_results(query)
    sources_html = format_sources(web_results)

    # Generate answer (GPU via @spaces.GPU)
    prompt = format_prompt(query, web_results)
    answer = generate_answer(prompt)
    
    # Convert history to messages format (role/content)
    new_history = history + [{"role": "user", "content": query}, {"role": "assistant", "content": answer}]
    
    return answer, sources_html, new_history

# Enhanced CSS for a polished, visually appealing UI
css = """
body {
    font-family: 'Arial', sans-serif;
    background: #1a1a1a;
    color: #ffffff;
}
.gradio-container {
    max-width: 1200px;
    margin: 0 auto;
    padding: 20px;
}
.header {
    text-align: center;
    padding: 20px;
    background: linear-gradient(135deg, #2c3e50, #3498db);
    border-radius: 12px;
    margin-bottom: 20px;
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
}
.header h1 { font-size: 2.5em; margin: 0; color: #ffffff; text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5); }
.header p { color: #bdc3c7; font-size: 1.1em; }
.search-box {
    background: #2c2c2c;
    padding: 15px;
    border-radius: 12px;
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
    margin-bottom: 20px;
}
.search-box input {
    background: #3a3a3e !important;
    color: #ffffff !important;
    border: none !important;
    border-radius: 8px !important;
    padding: 10px;
    font-size: 1em;
}
.search-box button {
    background: #3498db !important;
    border: none !important;
    border-radius: 8px !important;
    padding: 10px 20px;
    font-size: 1em;
    transition: background 0.3s;
}
.search-box button:hover { background: #2980b9 !important; }
.results-container {
    margin-top: 20px;
    display: flex;
    gap: 20px;
}
.answer-box {
    flex: 2;
    background: #2c2c2c;
    padding: 20px;
    border-radius: 12px;
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
    overflow-y: auto;
    max-height: 600px;
}
.answer-box .markdown {
    color: #ecf0f1;
    line-height: 1.6;
}
.answer-box .markdown h1 {
    color: #ffffff;
    border-bottom: 2px solid #3498db;
    padding-bottom: 10px;
}
.answer-box .markdown h2 {
    color: #a8b5c3;
    margin-top: 20px;
}
.answer-box .markdown ul {
    list-style-type: none;
    padding-left: 20px;
}
.answer-box .markdown ul li::before {
    content: "•";
    color: #3498db;
    display: inline-block;
    width: 1em;
    margin-left: -1em;
}
.answer-box .markdown a { color: #60a5fa; text-decoration: underline; }
.sources-list {
    flex: 1;
    background: #2c2c2c;
    padding: 15px;
    border-radius: 12px;
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
    max-height: 600px;
    overflow-y: auto;
}
.no-sources { color: #a8a9ab; font-style: italic; }
.source-item {
    margin-bottom: 15px;
    padding: 10px;
    background: #3a3a3e;
    border-radius: 8px;
    transition: background 0.2s;
}
.source-item:hover { background: #4a4b4e; }
.source-number { color: #3498db; font-weight: bold; margin-right: 10px; }
.source-link { color: #60a5fa; font-weight: 500; display: block; margin-bottom: 5px; }
.source-snippet { color: #e5e7eb; font-size: 0.9em; line-height: 1.4; }
.history-box {
    margin-top: 20px;
    background: #2c2c2c;
    padding: 15px;
    border-radius: 12px;
    max-height: 300px;
    overflow-y: auto;
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
}
"""

# Gradio app setup with Blocks
with gr.Blocks(title="Deep Research Engine - ZeroGPU", css=css) as demo:
    history_state = gr.State([])

    # Header
    with gr.Column(elem_classes="header"):
        gr.Markdown("# Deep Research Engine")
        gr.Markdown("Your gateway to in-depth, high-quality research for any query with real-time web insights.")

    # Search input and button
    with gr.Row(elem_classes="search-box"):
        search_input = gr.Textbox(label="", placeholder="Ask anything (e.g., 'What are the latest AI trends in 2025?')", lines=2)
        search_btn = gr.Button("Research", variant="primary")

    # Results layout
    with gr.Row(elem_classes="results-container"):
        with gr.Column():
            answer_output = gr.Markdown(label="Research Findings", elem_classes="answer-box")
        with gr.Column():
            sources_output = gr.HTML(label="Sources", elem_classes="sources-list")

    # Chat history (using messages format)
    with gr.Row():
        history_display = gr.Chatbot(label="History", elem_classes="history-box", type="messages")

    # Event handling
    def handle_search(query, history):
        answer, sources, new_history = process_deep_research(query, history)
        return answer, sources, new_history

    search_btn.click(
        fn=handle_search,
        inputs=[search_input, history_state],
        outputs=[answer_output, sources_output, history_display]
    ).then(
        fn=lambda x: x,
        inputs=[history_display],
        outputs=[history_state]
    )

    search_input.submit(
        fn=handle_search,
        inputs=[search_input, history_state],
        outputs=[answer_output, sources_output, history_display]
    ).then(
        fn=lambda x: x,
        inputs=[history_display],
        outputs=[history_state]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch()