import os import random import uuid import json import gradio as gr import numpy as np from PIL import Image import spaces import torch from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler from typing import Tuple DESCRIPTIONx = """## STABLE HAMSTER """ # Use environment variables for flexibility MODEL_ID = os.getenv("MODEL_REPO") MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096")) USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1" ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1" BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1")) # Allow generating multiple images at once # Determine device and load model outside of function for efficiency device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") pipe = StableDiffusionXLPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, use_safetensors=True, add_watermarker=False, ).to(device) pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) # Torch compile for potential speedup (experimental) if USE_TORCH_COMPILE: pipe.compile() # CPU offloading for larger RAM capacity (experimental) if ENABLE_CPU_OFFLOAD: pipe.enable_model_cpu_offload() MAX_SEED = np.iinfo(np.int32).max style_list = [ { "name": "3840 x 2160", "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "2560 x 1440", "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "3D Model", "prompt": "professional 3d model {prompt}. octane render, highly detailed, volumetric, dramatic lighting", "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting", }, ] styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list} STYLE_NAMES = list(styles.keys()) DEFAULT_STYLE_NAME = "3840 x 2160" def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]: p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME]) if not negative: negative = "" return p.replace("{prompt}", positive), n + negative def save_image(img): unique_name = str(uuid.uuid4()) + ".png" img.save(unique_name) return unique_name def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: if randomize_seed: seed = random.randint(0, MAX_SEED) return seed @spaces.GPU(duration=35, enable_queue=True) def generate( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, seed: int = 1, width: int = 1024, height: int = 1024, guidance_scale: float = 3, num_inference_steps: int = 30, randomize_seed: bool = False, use_resolution_binning: bool = True, num_images: int = 1, # Number of images to generate style: str = DEFAULT_STYLE_NAME, progress=gr.Progress(track_tqdm=True), ): prompt, negative_prompt = apply_style(style, prompt, negative_prompt) seed = int(randomize_seed_fn(seed, randomize_seed)) generator = torch.Generator(device=device).manual_seed(seed) # Improved options handling options = { "prompt": [prompt] * int(num_images), "negative_prompt": [negative_prompt] * int(num_images) if use_negative_prompt else None, "width": int(width), "height": int(height), "guidance_scale": float(guidance_scale), "num_inference_steps": int(num_inference_steps), "generator": generator, "output_type": "pil", } # Use resolution binning for faster generation with less VRAM usage if use_resolution_binning: options["use_resolution_binning"] = True # Generate images potentially in batches images = [] for i in range(0, int(num_images), BATCH_SIZE): batch_options = options.copy() batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE] if "negative_prompt" in batch_options: batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE] images.extend(pipe(**batch_options).images) image_paths = [save_image(img) for img in images] return image_paths, seed examples = [ "a cat eating a piece of cheese", "a ROBOT riding a BLUE horse on Mars, photorealistic, 4k", "Ironman VS Hulk, ultrarealistic", "Astronaut in a jungle, cold color palette, oil pastel, detailed, 8k", "An alien holding a sign board containing the word 'Flash', futuristic, neonpunk", "Kids going to school, Anime style" ] css = ''' .gradio-container{max-width: 560px !important} h1{text-align:center} footer { visibility: hidden } ''' with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo: gr.Markdown(DESCRIPTIONx) with gr.Group(): with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0) result = gr.Gallery(label="Result", columns=1, show_label=False) with gr.Accordion("Advanced options", open=False): num_images = gr.Slider( label="Number of Images", minimum=1, maximum=4, step=1, value=1, ) with gr.Row(): use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True) negative_prompt = gr.Text( label="Negative prompt", max_lines=5, lines=4, placeholder="Enter a negative prompt", value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, NSFW", visible=True, ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(visible=True): width = gr.Slider( label="Width", minimum=512, maximum=MAX_IMAGE_SIZE, step=64, value=1024, ) height = gr.Slider( label="Height", minimum=512, maximum=MAX_IMAGE_SIZE, step=64, value=1024, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.1, maximum=6, step=0.1, value=3.0, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=15, step=1, value=8, ) style_selection = gr.Radio( show_label=True, container=True, interactive=True, choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, label="Image Style", ) gr.Examples( examples=examples, inputs=prompt, cache_examples=False ) use_negative_prompt.change( fn=lambda x: gr.update(visible=x), inputs=use_negative_prompt, outputs=negative_prompt, api_name=False, ) gr.on( triggers=[ prompt.submit, negative_prompt.submit, run_button.click, ], fn=generate, inputs=[ prompt, negative_prompt, use_negative_prompt, seed, width, height, guidance_scale, num_inference_steps, randomize_seed, num_images, style_selection ], outputs=[result, seed], api_name="run", ) if __name__ == "__main__": demo.queue(max_size=50).launch()