import gradio as gr
from huggingface_hub import InferenceClient
import os
import json
import base64
from PIL import Image
import io

ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

# Function to encode image to base64
def encode_image(image_path):
    if not image_path:
        print("No image path provided")
        return None
    
    try
        print(f"Encoding image from path: {image_path}")
        
        # If it's already a PIL Image
        if isinstance(image_path, Image.Image):
            image = image_path
        else:
            # Try to open the image file
            image = Image.open(image_path)
        
        # Convert to RGB if image has an alpha channel (RGBA)
        if image.mode == 'RGBA':
            image = image.convert('RGB')
        
        # Encode to base64
        buffered = io.BytesIO()
        image.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
        print("Image encoded successfully")
        return img_str
    except Exception as e:
        print(f"Error encoding image: {e}")
        return None

def respond(
    message,
    image_files,  # Changed parameter name and structure
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    provider,
    custom_api_key,
    custom_model,    
    model_search_term,
    selected_model
):
    print(f"Received message: {message}")
    print(f"Received {len(image_files) if image_files else 0} images")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
    print(f"Selected provider: {provider}")         
    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
    print(f"Selected model (custom_model): {custom_model}")  
    print(f"Model search term: {model_search_term}")
    print(f"Selected model from radio: {selected_model}")

    # Determine which token to use
    token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
    
    if custom_api_key.strip() != "":
        print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
    else:
        print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
    
    # Initialize the Inference Client with the provider and appropriate token
    client = InferenceClient(token=token_to_use, provider=provider)
    print(f"Hugging Face Inference Client initialized with {provider} provider.")

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    # Create multimodal content if images are present
    if image_files and len(image_files) > 0:
        # Process the user message to include images
        user_content = []
        
        # Add text part if there is any
        if message and message.strip():
            user_content.append({
                "type": "text",
                "text": message
            })
        
        # Add image parts
        for img in image_files:
            if img is not None:
                # Get raw image data from path
                try:
                    encoded_image = encode_image(img)
                    if encoded_image:
                        user_content.append({
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{encoded_image}"
                            }
                        })
                except Exception as e:
                    print(f"Error encoding image: {e}")
    else:
        # Text-only message
        user_content = message

    # Prepare messages in the format expected by the API
    messages = [{"role": "system", "content": system_message}]
    print("Initial messages array constructed.")

    # Add conversation history to the context
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            # Handle both text-only and multimodal messages in history
            if isinstance(user_part, tuple) and len(user_part) == 2:
                # This is a multimodal message with text and images
                history_content = []
                if user_part[0]:  # Text
                    history_content.append({
                        "type": "text",
                        "text": user_part[0]
                    })
                
                for img in user_part[1]:  # Images
                    if img:
                        try:
                            encoded_img = encode_image(img)
                            if encoded_img:
                                history_content.append({
                                    "type": "image_url",
                                    "image_url": {
                                        "url": f"data:image/jpeg;base64,{encoded_img}"
                                    }
                                })
                        except Exception as e:
                            print(f"Error encoding history image: {e}")
                
                messages.append({"role": "user", "content": history_content})
            else:
                # Regular text message
                messages.append({"role": "user", "content": user_part})
            print(f"Added user message to context (type: {type(user_part)})")
        
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})
            print(f"Added assistant message to context: {assistant_part}")

    # Append the latest user message
    messages.append({"role": "user", "content": user_content})
    print(f"Latest user message appended (content type: {type(user_content)})")

    # Determine which model to use, prioritizing custom_model if provided
    model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
    print(f"Model selected for inference: {model_to_use}")

    # Start with an empty string to build the response as tokens stream in
    response = ""
    print(f"Sending request to {provider} provider.")

    # Prepare parameters for the chat completion request
    parameters = {
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
    }
    
    if seed is not None:
        parameters["seed"] = seed

    # Use the InferenceClient for making the request
    try:
        # Create a generator for the streaming response
        stream = client.chat_completion(
            model=model_to_use,
            messages=messages,
            stream=True,
            **parameters
        )
        
        print("Received tokens: ", end="", flush=True)
        
        # Process the streaming response
        for chunk in stream:
            if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
                # Extract the content from the response
                if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
                    token_text = chunk.choices[0].delta.content
                    if token_text:
                        print(token_text, end="", flush=True)
                        response += token_text
                        yield response
        
        print()
    except Exception as e:
        print(f"Error during inference: {e}")
        response += f"\nError: {str(e)}"
        yield response

    print("Completed response generation.")

# Function to validate provider selection based on BYOK
def validate_provider(api_key, provider):
    if not api_key.strip() and provider != "hf-inference":
        return gr.update(value="hf-inference")
    return gr.update(value=provider)

# GRADIO UI
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
    # Create the chatbot component
    chatbot = gr.Chatbot(
        height=600, 
        show_copy_button=True, 
        placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
        layout="panel"
    )
    print("Chatbot interface created.")
    
    # Multimodal textbox for messages (combines text and file uploads)
    msg = gr.MultimodalTextbox(
        placeholder="Type a message or upload images...",
        show_label=False,
        container=False,
        scale=12,
        file_types=["image"],
        file_count="multiple",
        sources=["upload"]
    )
    
    # Create accordion for settings
    with gr.Accordion("Settings", open=False):
        # System message
        system_message_box = gr.Textbox(
            value="You are a helpful AI assistant that can understand images and text.", 
            placeholder="You are a helpful assistant.",
            label="System Prompt"
        )
        
        # Generation parameters
        with gr.Row():
            with gr.Column():
                max_tokens_slider = gr.Slider(
                    minimum=1,
                    maximum=4096,
                    value=512,
                    step=1,
                    label="Max tokens"
                )
                
                temperature_slider = gr.Slider(
                    minimum=0.1,
                    maximum=4.0,
                    value=0.7,
                    step=0.1,
                    label="Temperature"
                )
                
                top_p_slider = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-P"
                )
                
            with gr.Column():
                frequency_penalty_slider = gr.Slider(
                    minimum=-2.0,
                    maximum=2.0,
                    value=0.0,
                    step=0.1,
                    label="Frequency Penalty"
                )
                
                seed_slider = gr.Slider(
                    minimum=-1,
                    maximum=65535,
                    value=-1,
                    step=1,
                    label="Seed (-1 for random)"
                )
        
        # Provider selection
        providers_list = [
            "hf-inference",  # Default Hugging Face Inference
            "cerebras",      # Cerebras provider
            "together",      # Together AI
            "sambanova",     # SambaNova
            "novita",        # Novita AI
            "cohere",        # Cohere
            "fireworks-ai",  # Fireworks AI
            "hyperbolic",    # Hyperbolic
            "nebius",        # Nebius
        ]
        
        provider_radio = gr.Radio(
            choices=providers_list,
            value="hf-inference",
            label="Inference Provider",
        )
        
        # New BYOK textbox
        byok_textbox = gr.Textbox(
            value="",
            label="BYOK (Bring Your Own Key)",
            info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
            placeholder="Enter your Hugging Face API token",
            type="password"  # Hide the API key for security
        )
        
        # Custom model box
        custom_model_box = gr.Textbox(
            value="",
            label="Custom Model",
            info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
            placeholder="meta-llama/Llama-3.3-70B-Instruct"
        )
        
        # Model search
        model_search_box = gr.Textbox(
            label="Filter Models",
            placeholder="Search for a featured model...",
            lines=1
        )
        
        # Featured models list
        models_list = [
            "meta-llama/Llama-3.2-11B-Vision-Instruct",
            "meta-llama/Llama-3.3-70B-Instruct",
            "meta-llama/Llama-3.1-70B-Instruct",
            "meta-llama/Llama-3.0-70B-Instruct",
            "meta-llama/Llama-3.2-3B-Instruct",
            "meta-llama/Llama-3.2-1B-Instruct",
            "meta-llama/Llama-3.1-8B-Instruct",
            "NousResearch/Hermes-3-Llama-3.1-8B",
            "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
            "mistralai/Mistral-Nemo-Instruct-2407",
            "mistralai/Mixtral-8x7B-Instruct-v0.1",
            "mistralai/Mistral-7B-Instruct-v0.3",
            "mistralai/Mistral-7B-Instruct-v0.2",
            "Qwen/Qwen3-235B-A22B",
            "Qwen/Qwen3-32B",
            "Qwen/Qwen2.5-72B-Instruct",
            "Qwen/Qwen2.5-3B-Instruct",
            "Qwen/Qwen2.5-0.5B-Instruct",
            "Qwen/QwQ-32B",
            "Qwen/Qwen2.5-Coder-32B-Instruct",
            "microsoft/Phi-3.5-mini-instruct",
            "microsoft/Phi-3-mini-128k-instruct",
            "microsoft/Phi-3-mini-4k-instruct",
        ]

        featured_model_radio = gr.Radio(
            label="Select a model below",
            choices=models_list,
            value="meta-llama/Llama-3.2-11B-Vision-Instruct",  # Default to a multimodal model
            interactive=True
        )
        
        gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")

    # MCP Support Information Accordion
    with gr.Accordion("MCP Support (for LLMs)", open=False):
        gr.Markdown("""
        ### Model Context Protocol (MCP) Support
        
        This application can function as an MCP Server, allowing compatible AI models and agents (like Claude Desktop or custom MCP clients) to use its text and image generation capabilities as a tool.
        
        When MCP is enabled, Gradio automatically exposes the relevant functions (likely based on the `bot` function in this app) as MCP tools.
        
        **To connect an MCP client to this server:**
        
        1. Ensure this Gradio application is running.
        2. Use the following URL for the MCP server in your client configuration:
           - If running locally: `http://127.0.0.1:7860/gradio_api/mcp/sse`
           - If deployed on Hugging Face Spaces: `https://YOUR_USERNAME-YOUR_SPACENAME.hf.space/gradio_api/mcp/sse` (replace with your actual Space URL)
        
        **Example MCP Client Configuration (`mcp.json` or similar):**
        ```json
        {
          "mcpServers": {
            "serverlessTextgenHub": {
              "url": "http://127.0.0.1:7860/gradio_api/mcp/sse" 
            }
          }
        }
        ```
        
        **Tool Parameters:**
        The exposed MCP tool will likely have parameters corresponding to the inputs of the `bot` function (e.g., `history`, `system_msg`, `max_tokens`, `temperature`, `model`, etc.). 
        
        *   **Important for `history` parameter:** For image inputs, the MCP client might need to format the `history` to include image references in a way the `bot` function can parse (e.g., markdown links `![Image](URL_or_base64_data_uri)` within the history's message part).
        *   It's highly recommended to inspect the MCP schema for this server to understand the exact tool names, descriptions, and input/output schemas. You can usually find this at: `http://127.0.0.1:7860/gradio_api/mcp/schema` (or the equivalent URL for your deployed Space).
        
        This allows for powerful integrations where an AI agent can programmatically request text or multimodal generations from this Serverless-TextGen-Hub.
        """)

    # Chat history state
    chat_history = gr.State([])
    
    # Function to filter models
    def filter_models(search_term):
        print(f"Filtering models with search term: {search_term}")
        filtered = [m for m in models_list if search_term.lower() in m.lower()]
        print(f"Filtered models: {filtered}")
        return gr.update(choices=filtered)

    # Function to set custom model from radio
    def set_custom_model_from_radio(selected):
        print(f"Featured model selected: {selected}")
        return selected

    # Function for the chat interface
    def user(user_message, history):
        print(f"User message received: {user_message}")
        
        if not user_message or (not user_message.get("text") and not user_message.get("files")):
            print("Empty message, skipping")
            return history # Return immediately if message is empty
        
        text_content = user_message.get("text", "").strip()
        files = user_message.get("files", [])
        
        print(f"Text content: {text_content}")
        print(f"Files: {files}")
        
        if not text_content and not files: # Check again after stripping text
            print("No content to display")
            return history

        # Append text message first if it exists and is not empty
        if text_content:
            print(f"Adding text message: {text_content}")
            history.append([text_content, None])
        
        # Then append each image file as a separate message
        if files:
            for file_path in files:
                if file_path and isinstance(file_path, str): # Ensure file_path is valid
                    print(f"Adding image: {file_path}")
                    history.append([f"![Image]({file_path})", None]) # Image as a new message
        
        return history
    
    # Define bot response function
    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
        if not history or not history[-1][0]: # Check if history or last message is empty
            print("No history or empty last message to process for bot")
            # Yield an empty update or the history itself to avoid errors
            # depending on how Gradio handles empty yields.
            # For safety, just return the history if it's in a bad state.
            yield history
            return

        user_message_content = history[-1][0] # This is the user's latest message (text or image markdown)
        print(f"Bot processing user message content: {user_message_content}")

        # Determine if the current turn is primarily about an image or text
        # This logic assumes images are added as separate history entries like "![Image](path)"
        # and text prompts might precede them or be separate.

        current_message_text_for_api = ""
        current_image_files_for_api = []
        
        # Check if the last entry is an image
        if isinstance(user_message_content, str) and user_message_content.startswith("![Image]("):
            image_path = user_message_content.replace("![Image](", "").replace(")", "")
            current_image_files_for_api.append(image_path)
            print(f"Bot identified image in last history entry: {image_path}")
            # If it's an image, check the second to last entry for a text prompt
            if len(history) > 1:
                prev_content = history[-2][0]
                if isinstance(prev_content, str) and not prev_content.startswith("![Image]("):
                    current_message_text_for_api = prev_content
                    print(f"Bot identified preceding text for image: {current_message_text_for_api}")
        else: # Last entry is text
            current_message_text_for_api = user_message_content
            print(f"Bot identified text in last history entry: {current_message_text_for_api}")

        # The history sent to `respond` should not include the current turn's input,
        # as `respond` will add `message` (current_message_text_for_api) to its internal `messages` list.
        # If an image is present, it's passed via `image_files`.
        history_for_respond_func = history[:-1] # Pass history *before* the current turn

        history[-1][1] = "" # Initialize assistant's response for the current turn
        
        for response_chunk in respond(
            message=current_message_text_for_api,
            image_files=current_image_files_for_api,
            history=history_for_respond_func, # Pass prior history
            system_message=system_msg,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=freq_penalty,
            seed=seed,
            provider=provider,
            custom_api_key=api_key,
            custom_model=custom_model,
            model_search_term=search_term, # Though these two might not be directly used by respond if model is fixed
            selected_model=selected_model
        ):
            history[-1][1] = response_chunk
            yield history


    # Event handlers
    msg.submit(
        user,
        [msg, chatbot],
        [chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider, 
         frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box, 
         model_search_box, featured_model_radio],
        [chatbot]
    ).then(
        lambda: {"text": "", "files": []},  # Clear inputs after submission
        None,
        [msg]
    )
    
    model_search_box.change(
        fn=filter_models,
        inputs=model_search_box,
        outputs=featured_model_radio
    )
    print("Model search box change event linked.")

    featured_model_radio.change(
        fn=set_custom_model_from_radio,
        inputs=featured_model_radio,
        outputs=custom_model_box
    )
    print("Featured model radio button change event linked.")
    
    byok_textbox.change(
        fn=validate_provider,
        inputs=[byok_textbox, provider_radio],
        outputs=provider_radio
    )
    print("BYOK textbox change event linked.")

    provider_radio.change(
        fn=validate_provider,
        inputs=[byok_textbox, provider_radio],
        outputs=provider_radio
    )
    print("Provider radio button change event linked.")

print("Gradio interface initialized.")

if __name__ == "__main__":
    print("Launching the demo application.")
    demo.launch(show_api=True, mcp_server=True) # MCP SERVER ENABLED HERE