stable-diffusion-3.5-large

Running on Zero

File size: 6,018 Bytes

import gradio as gr
import numpy as np
import random

import spaces
from diffusers import DiffusionPipeline
import torch
import io
import base64

import numpy as np
import random
import spaces
import torch
import time
from diffusers import DiffusionPipeline, AutoencoderTiny
from diffusers.models.attention_processor import AttnProcessor2_0
from custom_pipeline import FluxWithCFGPipeline

torch.backends.cuda.matmul.allow_tf32 = True

device = "cuda" if torch.cuda.is_available() else "cpu"
model_repo_id = "stabilityai/stable-diffusion-3.5-large"

if torch.cuda.is_available():
    torch_dtype = torch.bfloat16
else:
    torch_dtype = torch.float32

pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
pipe = pipe.to(device)







def adjust_to_nearest_multiple(value, divisor=8):
    """
    Adjusts the input value to the nearest multiple of the divisor.
    
    Args:
    value (int): The value to adjust.
    divisor (int): The divisor to which the value should be divisible. Default is 8.
    Returns:
    int: The nearest multiple of the divisor.
    """
    if value % divisor == 0:
        return value
    else:
        # Round to the nearest multiple of divisor
        return round(value / divisor) * divisor

def adjust_dimensions(height, width):
    """
    Adjusts the height and width to be divisible by 8.
    
    Args:
    height (int): The height to adjust.
    width (int): The width to adjust.
    Returns:
    tuple: Adjusted height and width.
    """
    new_height = adjust_to_nearest_multiple(height)
    new_width = adjust_to_nearest_multiple(width)
    
    return new_height, new_width


# MAX_SEED = np.iinfo(np.int32).max
# MAX_IMAGE_SIZE = 4100


MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

@spaces.GPU(duration=100)
def infer(
    prompt,
    negative_prompt="",
    seed=42,
    randomize_seed=False,
    width=1024,
    height=1024,
    guidance_scale=4.5,
    num_inference_steps=40,
    progress=gr.Progress(track_tqdm=True),
):
    
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    width = min(width, MAX_IMAGE_SIZE  )
    height = min(height, MAX_IMAGE_SIZE  )
    height, width = adjust_dimensions(height, width)

    generator = torch.Generator().manual_seed(seed)

    
    image = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            width=width,
            height=height,
            generator=generator,
        ).images[0]


    buffered = io.BytesIO()
    image.save(buffered, format="WEBP")
    img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
    img_data_url = f"data:image/webp;base64,{img_base64}"

    print(img_data_url)
    

    return image, img_data_url


examples = [
        "A capybara wearing a suit holding a sign that reads Hello World",
]

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # [Stable Diffusion 3.5 Large (8B)](https://huggingface.co/stabilityai/stable-diffusion-3.5-large)")
        gr.Markdown("[Learn more](https://stability.ai/news/introducing-stable-diffusion-3-5) about the Stable Diffusion 3.5 series. Try on [Stability AI API](https://platform.stability.ai/docs/api-reference#tag/Generate/paths/~1v2beta~1stable-image~1generate~1sd3/post), or [download model](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) to run locally with ComfyUI or diffusers.")
        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )

            run_button = gr.Button("Run", scale=0, variant="primary")

        result = gr.Image(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter a negative prompt",
                visible=False,
            )

            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )

            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=512,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024, 
                )

                height = gr.Slider(
                    label="Height",
                    minimum=512,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,
                )

            with gr.Row():
                guidance_scale = gr.Slider(
                    label="Guidance scale",
                    minimum=0.0,
                    maximum=7.5,
                    step=0.1,
                    value=4.5,
                )

                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=40, 
                )

        gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            seed,
            randomize_seed,
            width,
            height,
            guidance_scale,
            num_inference_steps,
        ],
        outputs=[result, seed],
    )

if __name__ == "__main__":
    demo.launch()