Spaces:

Nick088
/

stable-diffusion-arena

Running on Zero

File size: 20,226 Bytes

import torch
from diffusers import StableDiffusion3Pipeline, StableDiffusionPipeline, StableDiffusionXLPipeline, DPMSolverSinglestepScheduler
import gradio as gr
import os
import random
import numpy as np
import spaces

HF_TOKEN = os.getenv("HF_TOKEN")

if torch.cuda.is_available():
    device = "cuda"
    print("Using GPU")
else:
    device = "cpu"
    print("Using CPU")


MAX_SEED = np.iinfo(np.int32).max

# Initialize the pipelines for each sd model
sd3_medium_pipe = StableDiffusion3Pipeline.from_pretrained(
    "stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16
)
sd3_medium_pipe.enable_model_cpu_offload()

sd2_1_pipe = StableDiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
)
sd2_1_pipe.enable_model_cpu_offload()

sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
)
sdxl_pipe.enable_model_cpu_offload()

sdxl_flash_pipe = StableDiffusionXLPipeline.from_pretrained(
    "sd-community/sdxl-flash", torch_dtype=torch.float16
)
sdxl_flash_pipe.enable_model_cpu_offload()
# Ensure sampler uses "trailing" timesteps for sdxl flash.
sdxl_flash_pipe.scheduler = DPMSolverSinglestepScheduler.from_config(sdxl_flash_pipe.scheduler.config, timestep_spacing="trailing")

# Helper function to generate images for a single model
@spaces.GPU(duration=80)
def generate_single_image(
    prompt,
    negative_prompt,
    num_inference_steps,
    height,
    width,
    guidance_scale,
    seed,
    num_images_per_prompt,
    model_choice,
    generator,
):
    # Select the correct pipeline based on the model choice
    if model_choice == "sd3 medium":
        pipe = sd3_medium_pipe
    elif model_choice == "sd2.1":
        pipe = sd2_1_pipe
    elif model_choice == "sdxl":
        pipe = sdxl_pipe
    elif model_choice == "sdxl flash":
        pipe = sdxl_flash_pipe
    else:
        raise ValueError(f"Invalid model choice: {model_choice}")

    output = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        height=height,
        width=width,
        guidance_scale=guidance_scale,
        generator=generator,
        num_images_per_prompt=num_images_per_prompt,
    ).images

    return output

# Define the image generation function for the Arena tab
@spaces.GPU(duration=80)
def generate_arena_images(
    prompt,
    negative_prompt,
    num_inference_steps,
    height,
    width,
    guidance_scale,
    seed,
    num_images_per_prompt,
    model_choice_A,
    model_choice_B,
    use_same_settings,
    progress=gr.Progress(track_tqdm=True),
):
    if seed == 0:
        seed = random.randint(1, 2**32 - 1)

    generator = torch.Generator().manual_seed(seed)

    # Generate images for both models
    images_A = generate_single_image(
        prompt,
        negative_prompt,
        num_inference_steps,
        height,
        width,
        guidance_scale,
        seed,
        num_images_per_prompt,
        model_choice_A,
        generator,
    )
    images_B = generate_single_image(
        prompt,
        negative_prompt,
        num_inference_steps,
        height,
        width,
        guidance_scale,
        seed,
        num_images_per_prompt,
        model_choice_B,
        generator,
    )

    return images_A, images_B

# Define the image generation function for the Individual tab
@spaces.GPU(duration=80)
def generate_individual_image(
    prompt,
    negative_prompt,
    num_inference_steps,
    height,
    width,
    guidance_scale,
    seed,
    num_images_per_prompt,
    model_choice,
    progress=gr.Progress(track_tqdm=True),
):
    if seed == 0:
        seed = random.randint(1, 2**32 - 1)

    generator = torch.Generator().manual_seed(seed)

    output = generate_single_image(
        prompt,
        negative_prompt,
        num_inference_steps,
        height,
        width,
        guidance_scale,
        seed,
        num_images_per_prompt,
        model_choice,
        generator,
    )

    return output


# Create the Gradio interface
examples = [
    ["A white car racing fast to the moon."],
    ["A woman in a red dress singing on top of a building."],
    ["An astronaut on mars in a futuristic cyborg suit."],
]

css = """
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
"""
with gr.Blocks(css=css) as demo:
    with gr.Row():
        with gr.Column():
            gr.HTML(
                """
            <h1 style='text-align: center'>
            Stable Diffusion Arena
            </h1>
            """
            )
            gr.HTML(
                """
               Made by <a href='https://linktr.ee/Nick088' target='_blank'>Nick088</a>
               <br> <a href="https://discord.gg/osai"> <img src="https://img.shields.io/discord/1198701940511617164?color=%23738ADB&label=Discord&style=for-the-badge" alt="Discord"> </a>
                """
            )
    with gr.Tabs():
        with gr.TabItem("Arena"):
            with gr.Group():
                with gr.Column():
                    prompt = gr.Textbox(
                        label="Prompt",
                        info="Describe the image you want",
                        placeholder="A cat...",
                    )
                    model_choice_A = gr.Dropdown(
                        label="Stable Diffusion Model A",
                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash"],
                        value="sd3 medium",
                    )
                    model_choice_B = gr.Dropdown(
                        label="Stable Diffusion Model B",
                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash"],
                        value="sdxl",
                    )
                    run_button = gr.Button("Run")
                result_A = gr.Gallery(label="Generated Images (Model A)", elem_id="gallery_A")
                result_B = gr.Gallery(label="Generated Images (Model B)", elem_id="gallery_B")
            with gr.Accordion("Advanced options", open=False):
                use_same_settings = gr.Checkbox(label='Use same settings for both models', value=True)

                # Conditional UI elements based on use_same_settings
                with gr.Row(visible=True):
                    negative_prompt = gr.Textbox(
                        label="Negative Prompt",
                        info="Describe what you don't want in the image",
                        value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
                        placeholder="Ugly, bad anatomy...",
                    )
                with gr.Row(visible=True):
                    num_inference_steps = gr.Slider(
                        label="Number of Inference Steps",
                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
                        minimum=1,
                        maximum=50,
                        value=25,
                        step=1,
                    )
                    guidance_scale = gr.Slider(
                        label="Guidance Scale",
                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
                        minimum=0.0,
                        maximum=10.0,
                        value=7.5,
                        step=0.1,
                    )
                with gr.Row(visible=True):
                    width = gr.Slider(
                        label="Width",
                        info="Width of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                    height = gr.Slider(
                        label="Height",
                        info="Height of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                with gr.Row(visible=True):
                    seed = gr.Slider(
                        value=42,
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        label="Seed",
                        info="A starting point to initiate the generation process, put 0 for a random one",
                    )
                    num_images_per_prompt = gr.Slider(
                        label="Images Per Prompt",
                        info="Number of Images to generate with the settings",
                        minimum=1,
                        maximum=4,
                        step=1,
                        value=2,
                    )

                # Conditional UI elements based on use_same_settings
                with gr.Row(visible=False):
                    negative_prompt_A = gr.Textbox(
                        label="Negative Prompt (Model A)",
                        info="Describe what you don't want in the image",
                        value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
                        placeholder="Ugly, bad anatomy...",
                    )
                    negative_prompt_B = gr.Textbox(
                        label="Negative Prompt (Model B)",
                        info="Describe what you don't want in the image",
                        value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
                        placeholder="Ugly, bad anatomy...",
                    )
                with gr.Row(visible=False):
                    num_inference_steps_A = gr.Slider(
                        label="Number of Inference Steps (Model A)",
                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
                        minimum=1,
                        maximum=50,
                        value=25,
                        step=1,
                    )
                    num_inference_steps_B = gr.Slider(
                        label="Number of Inference Steps (Model B)",
                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
                        minimum=1,
                        maximum=50,
                        value=25,
                        step=1,
                    )
                with gr.Row(visible=False):
                    width_A = gr.Slider(
                        label="Width (Model A)",
                        info="Width of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                    width_B = gr.Slider(
                        label="Width (Model B)",
                        info="Width of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                with gr.Row(visible=False):
                    height_A = gr.Slider(
                        label="Height (Model A)",
                        info="Height of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                    height_B = gr.Slider(
                        label="Height (Model B)",
                        info="Height of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                with gr.Row(visible=False):
                    guidance_scale_A = gr.Slider(
                        label="Guidance Scale (Model A)",
                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
                        minimum=0.0,
                        maximum=10.0,
                        value=7.5,
                        step=0.1,
                    )
                    guidance_scale_B = gr.Slider(
                        label="Guidance Scale (Model B)",
                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
                        minimum=0.0,
                        maximum=10.0,
                        value=7.5,
                        step=0.1,
                    )
                with gr.Row(visible=False):
                    seed_A = gr.Slider(
                        value=42,
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        label="Seed (Model A)",
                        info="A starting point to initiate the generation process, put 0 for a random one",
                    )
                    seed_B = gr.Slider(
                        value=42,
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        label="Seed (Model B)",
                        info="A starting point to initiate the generation process, put 0 for a random one",
                    )
                with gr.Row(visible=False):
                    num_images_per_prompt_A = gr.Slider(
                        label="Images Per Prompt (Model A)",
                        info="Number of Images to generate with the settings",
                        minimum=1,
                        maximum=4,
                        step=1,
                        value=2,
                    )
                    num_images_per_prompt_B = gr.Slider(
                        label="Images Per Prompt (Model B)",
                        info="Number of Images to generate with the settings",
                        minimum=1,
                        maximum=4,
                        step=1,
                        value=2,
                    )

            gr.Examples(
                examples=examples,
                inputs=[prompt],
                outputs=[result_A, result_B],
                fn=generate_arena_images,
            )

            gr.on(
                triggers=[
                    prompt.submit,
                    run_button.click,
                ],
                fn=generate_arena_images,
                inputs=[
                    prompt,
                    negative_prompt,
                    num_inference_steps,
                    height,
                    width,
                    guidance_scale,
                    seed,
                    num_images_per_prompt,
                    model_choice_A,
                    model_choice_B,
                    use_same_settings,
                ],
                outputs=[result_A, result_B],
            )

        with gr.TabItem("Individual"):
            with gr.Group():
                with gr.Column():
                    prompt = gr.Textbox(
                        label="Prompt",
                        info="Describe the image you want",
                        placeholder="A cat...",
                    )
                    model_choice = gr.Dropdown(
                        label="Stable Diffusion Model",
                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash"],
                        value="sd3 medium",
                    )
                    run_button = gr.Button("Run")
                result = gr.Gallery(label="Generated AI Images", elem_id="gallery")
            with gr.Accordion("Advanced options", open=False):
                with gr.Row():
                    negative_prompt = gr.Textbox(
                        label="Negative Prompt",
                        info="Describe what you don't want in the image",
                        value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
                        placeholder="Ugly, bad anatomy...",
                    )
                with gr.Row():
                    num_inference_steps = gr.Slider(
                        label="Number of Inference Steps",
                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
                        minimum=1,
                        maximum=50,
                        value=25,
                        step=1,
                    )
                    guidance_scale = gr.Slider(
                        label="Guidance Scale",
                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
                        minimum=0.0,
                        maximum=10.0,
                        value=7.5,
                        step=0.1,
                    )
                with gr.Row():
                    width = gr.Slider(
                        label="Width",
                        info="Width of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                    height = gr.Slider(
                        label="Height",
                        info="Height of the Image",
                        minimum=256,
                        maximum=1344,
                        step=32,
                        value=1024,
                    )
                with gr.Row():
                    seed = gr.Slider(
                        value=42,
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        label="Seed",
                        info="A starting point to initiate the generation process, put 0 for a random one",
                    )
                    num_images_per_prompt = gr.Slider(
                        label="Images Per Prompt",
                        info="Number of Images to generate with the settings",
                        minimum=1,
                        maximum=4,
                        step=1,
                        value=2,
                    )

            gr.Examples(
                examples=examples,
                inputs=[prompt],
                outputs=[result],
                fn=generate_individual_image,
            )

            gr.on(
                triggers=[
                    prompt.submit,
                    run_button.click,
                ],
                fn=generate_individual_image,
                inputs=[
                    prompt,
                    negative_prompt,
                    num_inference_steps,
                    width,
                    height,
                    guidance_scale,
                    seed,
                    num_images_per_prompt,
                    model_choice,
                ],
                outputs=[result],
            )

demo.queue().launch(share=False)