import gradio as gr
import torch
import numpy as np
from diffusers import DiffusionPipeline
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from functools import lru_cache
from PIL import Image

from transformers import CLIPImageProcessor  # Updated per deprecation warning


@lru_cache(maxsize=1)
def load_pipeline():
    # Determine device and appropriate torch_dtype
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch_dtype = torch.bfloat16 if device.type == "cuda" else torch.float32

    base_model = "black-forest-labs/FLUX.1-dev"
    pipe = DiffusionPipeline.from_pretrained(
        base_model,
        torch_dtype=torch_dtype,
        low_cpu_mem_usage=True  # Reduce memory usage during load
    )
    
    # Load LoRA weights
    lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
    pipe.load_lora_weights(lora_repo)
    
    # Load safety checker and image processor
    safety_checker = StableDiffusionSafetyChecker.from_pretrained(
        "CompVis/stable-diffusion-safety-checker"
    )
    image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
    
    # Enable GPU optimizations if on GPU; else, try sequential offloading on CPU
    if device.type == "cuda":
        try:
            pipe.enable_xformers_memory_efficient_attention()
        except Exception as e:
            print("Warning: Could not enable xformers memory efficient attention:", e)
    else:
        try:
            pipe.enable_sequential_cpu_offload()
        except Exception as e:
            print("Warning: Could not enable sequential CPU offload:", e)
    
    pipe = pipe.to(device)
    return pipe, safety_checker, image_processor

pipe, safety_checker, image_processor = load_pipeline()

def generate_image(
    prompt,
    seed=42,
    width=512,   # Lowered default resolution
    height=512,  # Lowered default resolution
    guidance_scale=6,
    steps=28,
    progress=gr.Progress()
):
    try:
        progress(0, desc="Initializing...")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        generator = torch.Generator(device=device).manual_seed(seed)
        
        # Auto-add the trigger word if not already present
        if "super realism" not in prompt.lower():
            prompt = f"Super Realism, {prompt}"
        
        def update_progress(step, timestep, latents):
            progress((step + 1) / steps, desc="Generating image...")
        
        with torch.inference_mode():
            result = pipe(
                prompt=prompt,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=steps,
                generator=generator,
                callback=update_progress
            )
            image = result.images[0]
        
        progress(1, desc="Safety checking...")
        safety_input = image_processor(image, return_tensors="pt")
        np_image = np.array(image)
        
        # Run safety checker; it returns a tuple where the second element is nsfw flags
        _, nsfw_detected = safety_checker(
            images=[np_image],
            clip_input=safety_input.pixel_values
        )
        
        if nsfw_detected[0]:
            return Image.new("RGB", (512, 512)), "NSFW content detected"
        
        return image, "Generation successful"
    
    except Exception as e:
        return Image.new("RGB", (512, 512)), f"Error: {str(e)}"

with gr.Blocks() as app:
    gr.Markdown("# Flux Super Realism Generator")
    
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
            seed_input = gr.Slider(0, 1000, value=42, label="Seed")
            # Lower the resolution slider range for less memory-intensive generation
            width_input = gr.Slider(256, 1024, value=512, label="Width")
            height_input = gr.Slider(256, 1024, value=512, label="Height")
            guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
            steps_input = gr.Slider(10, 100, value=28, label="Steps")
            submit = gr.Button("Generate")
        
        with gr.Column():
            output_image = gr.Image(label="Result", type="pil")
            status = gr.Textbox(label="Status")
    
    submit.click(
        generate_image,
        inputs=[prompt_input, seed_input, width_input, height_input, guidance_input, steps_input],
        outputs=[output_image, status]
    )
    
    # Use queue without GPU-specific parameters
    app.queue(max_size=3).launch()