import re
import base64
import io
import torch
import gradio as gr
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor

# Load the model and processor
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
model = MllamaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)

def generate_model_response(image_file, user_query):
    """
    Processes the uploaded image and user query to generate a response from the model.
    
    Parameters:
    - image_file: The uploaded image file.
    - user_query: The user's question about the image.

    Returns:
    - str: The generated response from the model, formatted as HTML.
    """
    try:
        # Load and prepare the image
        raw_image = Image.open(image_file).convert("RGB")
        
        # Prepare input for the model using the processor
        conversation = [
            {
                "role": "user",
                "content": [
                    {"type": "image", "url": "<|image|>"},  # Placeholder for image
                    {"type": "text", "text": user_query}
                ]
            }
        ]

        # Apply chat template to prepare inputs for the model
        inputs = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
        
        # Process the image and text inputs together
        inputs = processor(inputs, raw_image, return_tensors="pt").to(model.device)

        # Generate response from the model
        outputs = model.generate(**inputs)
        
        # Decode and format the response
        generated_text = processor.decode(outputs[0], skip_special_tokens=True)
        
        return generated_text
    
    except Exception as e:
        print(f"Error in generating response: {e}")
        return f"<p>An error occurred: {str(e)}</p>"

# Gradio Interface
iface = gr.Interface(
    fn=generate_model_response,
    inputs=[
        gr.Image(type="file", label="Upload Image"),
        gr.Textbox(label="Enter your question", placeholder="How many calories are in this food?")
    ],
    outputs=gr.HTML(label="Response from Model"),
)

iface.launch(share=True)