import os
import requests

url = "https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter/resolve/main/ip-adapter.bin"
file_path = "ip-adapter.bin"

# Check if the file already exists
if not os.path.exists(file_path):
    print("File not found, downloading...")
    response = requests.get(url, stream=True)
    with open(file_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
    print("Download completed!")
else:
    print("File already exists.")

from models.transformer_sd3 import SD3Transformer2DModel
import gradio as gr
import torch
from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
import os
from PIL import Image
import spaces
from huggingface_hub import login
token = os.getenv("HF_TOKEN")
login(token=token)

model_path = 'stabilityai/stable-diffusion-3.5-large'
ip_adapter_path = './ip-adapter.bin'
image_encoder_path = "google/siglip-so400m-patch14-384"

transformer = SD3Transformer2DModel.from_pretrained(
    model_path, subfolder="transformer", torch_dtype=torch.bfloat16
)

pipe = StableDiffusion3Pipeline.from_pretrained(
    model_path, transformer=transformer, torch_dtype=torch.bfloat16
).to("cuda")

pipe.init_ipadapter(
    ip_adapter_path=ip_adapter_path,
    image_encoder_path=image_encoder_path,
    nb_token=64,
)


@spaces.GPU
def gui_generation(prompt, ref_img, guidance_scale, ipadapter_scale):
    ref_img = Image.open(ref_img).convert('RGB')

    with torch.no_grad():
        # Ensure the pipeline runs with correct dtype and device
        image = pipe(
            width=1024,
            height=1024,
            prompt=prompt,
            negative_prompt="lowres, low quality, worst quality",
            num_inference_steps=24,
            guidance_scale=guidance_scale,
            generator=torch.Generator("cuda").manual_seed(42),
            clip_image=ref_img.convert('RGB'),
            ipadapter_scale=ipadapter_scale).images
    return image[0]

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Stable Diffusion 3.5 Image Generation")

    with gr.Row():
        prompt_box = gr.Textbox(label="Prompt", placeholder="Enter your image generation prompt")

    with gr.Row():
        ref_img = gr.Image(type="filepath", label="Upload Reference Image")

    with gr.Row():
        guidance_slider = gr.Slider(
            label="Guidance Scale",
            minimum=2,
            maximum=16,
            value=7,
            step=0.5,
            info="Controls adherence to the text prompt"
        )
        ipadapter_slider = gr.Slider(
            label="IP-Adapter Scale",
            minimum=0,
            maximum=1,
            value=0.5,
            step=0.1,
            info="Controls influence of the image prompt"
        )

    generate_btn = gr.Button("Generate")
    gallery = gr.Image(type="pil", label="Generated Image")

    generate_btn.click(
        fn=gui_generation,
        inputs=[prompt_box, ref_img, guidance_slider, ipadapter_slider],
        outputs=gallery
    )

demo.launch()