import gradio as gr
from huggingface_hub import InferenceClient
import json
import uuid
from PIL import Image
from bs4 import BeautifulSoup
import requests
import random
from transformers import LlavaProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
from threading import Thread
import re
import time
import torch

# Initialize model and processor
model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
processor = LlavaProcessor.from_pretrained(model_id)
model = LlavaForConditionalGeneration.from_pretrained(model_id).to("cpu")

# Initialize inference clients for different models
client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
client_yi = InferenceClient("01-ai/Yi-1.5-34B-Chat")

def search(query):
    """Performs a Google search and extracts text from the top results."""
    session = requests.Session()
    response = session.get(f"https://www.google.com/search?q={query}", 
                           headers={"User-Agent": "Mozilla/5.0"})
    soup = BeautifulSoup(response.text, "html.parser")
    results = []
    for result in soup.find_all("div", class_="BNeawe vvjwJb AP7Wnd"):
        text = result.get_text()
        link = result.find_parent("a")["href"]
        results.append(f"{text}: {link}")
    return "\n".join(results[:3])

def llava(inputs, history):
    """Processes an image and text input with Llava."""
    image = Image.open(inputs["files"][0]).convert("RGB")
    prompt = f"<|im_start|>user <image>\n{inputs['text']}<|im_end|>"
    processed = processor(prompt, image, return_tensors="pt").to("cpu")
    return processed

def respond(message, history):
    """Main response function for the chatbot."""
    if "files" in message and message["files"]:
        inputs = llava(message, history)
        streamer = TextIteratorStreamer(skip_prompt=True, skip_special_tokens=True)
        thread = Thread(target=model.generate, kwargs=dict(inputs=inputs, max_new_tokens=512, streamer=streamer))
        thread.start()
        buffer = ""
        for new_text in streamer:
            buffer += new_text
            yield buffer
    else:
        prompt = [{"role": "user", "content": msg[0]} for msg in history]
        prompt.append({"role": "user", "content": message["text"]})
        response = client_gemma.chat_completion(prompt, max_tokens=200)
        yield response["choices"][0]["message"]["content"]

def generate_image(prompt):
    """Generates an image using the external model."""
    client = InferenceClient("KingNish/Image-Gen-Pro")
    return client.predict("Image Generation", None, prompt, api_name="/image_gen_pro")

# Set up Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(placeholder="Enter your message...")
            file_input = gr.File(label="Upload an image")
        with gr.Column():
            output = gr.Image(label="Generated Image")
    with gr.Row():
        search_button = gr.Button("Search Google")
        image_button = gr.Button("Generate Image")
    examples = [
        {"text": "Who are you?"},
        {"text": "Generate an image of the Eiffel Tower at night."},
        {"text": "Search for the latest trends on YouTube."},
    ]

    def handle_text(text, state):
        response = respond({"text": text}, state)
        return response, state

    def handle_file_upload(files, state):
        response = respond({"files": files, "text": "Describe this image."}, state)
        return response, state

    # Connect components to callbacks
    text_input.submit(handle_text, [text_input], [chatbot])
    file_input.change(handle_file_upload, [file_input], [chatbot])

    # Search button functionality
    search_button.click(lambda query: search(query), [text_input], [chatbot])
    image_button.click(lambda text: generate_image(text), [text_input], [output])

# Launch the Gradio interface
demo.launch()