import gradio as gr
import numpy as np
import random
import spaces
import torch
from PIL import Image
from torchvision import transforms
from diffusers import DiffusionPipeline

# Define constants
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048
MIN_IMAGE_SIZE = 256
DEFAULT_IMAGE_SIZE = 1024
MAX_PROMPT_LENGTH = 500

# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cpu":
    print("Warning: Running on CPU. This may be very slow.")

dtype = torch.float16 if device == "cuda" else torch.float32

def load_model():
    try:
        return DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
    except Exception as e:
        raise RuntimeError(f"Failed to load the model: {str(e)}")

# Load the diffusion pipeline
pipe = load_model()

def preprocess_image(image, target_size=(512, 512)):
    # Preprocess the image for the VAE
    preprocess = transforms.Compose([
        transforms.Resize(target_size, interpolation=transforms.InterpolationMode.LANCZOS),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])
    image = preprocess(image).unsqueeze(0).to(device, dtype=dtype)
    return image

def encode_image(image, vae):
    # Encode the image using the VAE
    with torch.no_grad():
        latents = vae.encode(image).latent_dist.sample() * 0.18215
    return latents

def validate_inputs(prompt, width, height, num_inference_steps):
    if not prompt or len(prompt) > MAX_PROMPT_LENGTH:
        raise ValueError(f"Prompt must be between 1 and {MAX_PROMPT_LENGTH} characters.")
    if width % 8 != 0 or height % 8 != 0:
        raise ValueError("Width and height must be divisible by 8.")
    if width < MIN_IMAGE_SIZE or width > MAX_IMAGE_SIZE or height < MIN_IMAGE_SIZE or height > MAX_IMAGE_SIZE:
        raise ValueError(f"Image dimensions must be between {MIN_IMAGE_SIZE} and {MAX_IMAGE_SIZE}.")
    if num_inference_steps < 1 or num_inference_steps > 50:
        raise ValueError("Number of inference steps must be between 1 and 50.")

@spaces.GPU()
def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=DEFAULT_IMAGE_SIZE, height=DEFAULT_IMAGE_SIZE, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
    try:
        validate_inputs(prompt, width, height, num_inference_steps)
        
        if randomize_seed:
            seed = random.randint(0, MAX_SEED)
        generator = torch.Generator(device=device).manual_seed(seed)
        
        if init_image is not None:
            init_image = init_image.convert("RGB")
            init_image = preprocess_image(init_image, (height, width))
            latents = encode_image(init_image, pipe.vae)
            latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8), mode='bilinear')
            image = pipe(
                prompt=prompt,
                height=height,
                width=width,
                num_inference_steps=num_inference_steps,
                generator=generator,
                guidance_scale=0.0,
                latents=latents
            ).images[0]
        else:
            image = pipe(
                prompt=prompt,
                height=height,
                width=width,
                num_inference_steps=num_inference_steps,
                generator=generator,
                guidance_scale=0.0
            ).images[0]
        
        return image, seed
    except Exception as e:
        raise gr.Error(str(e))

# Define example prompts
examples = [
    "a tiny astronaut hatching from an egg on the moon",
    "a cat holding a sign that says hello world",
    "an anime illustration of a wiener schnitzel",
    "a surreal landscape with floating islands and waterfalls",
    "a steampunk-inspired cityscape at sunset"
]

# CSS styling for the Japanese-inspired interface
css = """
body {
    background-color: #fff;
    font-family: 'Noto Sans JP', sans-serif;
    color: #333;
}
#col-container {
    margin: 0 auto;
    max-width: 520px;
    border: 2px solid #000;
    padding: 20px;
    background-color: #f7f7f7;
    border-radius: 10px;
}
.gr-button {
    background-color: #e60012;
    color: #fff;
    border: 2px solid #000;
}
.gr-button:hover {
    background-color: #c20010;
}
.gr-slider, .gr-checkbox, .gr-textbox {
    border: 2px solid #000;
}
.gr-accordion {
    border: 2px solid #000;
    background-color: #fff;
}
.gr-image {
    border: 2px solid #000;
}
"""

# Create the Gradio interface
with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown("""
        # FLUX.1 [schnell]
        12B param rectified flow transformer distilled from [FLUX.1 [pro]](https://blackforestlabs.ai/) for 4 step generation
        [[blog](https://blackforestlabs.ai/announcing-black-forest-labs/)] [[model](https://huggingface.co/black-forest-labs/FLUX.1-schnell)]
        """)

        with gr.Row():
            prompt = gr.Textbox(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder=f"Enter your prompt (max {MAX_PROMPT_LENGTH} characters)",
                container=False,
            )
            run_button = gr.Button("Run", scale=0)

        with gr.Row():
            init_image = gr.Image(label="Initial Image (optional)", type="pil")
            result = gr.Image(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=42,
            )
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=MIN_IMAGE_SIZE,
                    maximum=MAX_IMAGE_SIZE,
                    step=8,
                    value=DEFAULT_IMAGE_SIZE,
                )
                height = gr.Slider(
                    label="Height",
                    minimum=MIN_IMAGE_SIZE,
                    maximum=MAX_IMAGE_SIZE,
                    step=8,
                    value=DEFAULT_IMAGE_SIZE,
                )

            with gr.Row():
                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=4,
                )

        gr.Examples(
            examples=examples,
            fn=infer,
            inputs=[prompt],
            outputs=[result, seed],
            cache_examples="lazy"
        )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[prompt, init_image, seed, randomize_seed, width, height, num_inference_steps],
        outputs=[result, seed]
    )

if __name__ == "__main__":
    demo.launch()