import gradio as gr import torch from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline from PIL import Image import os import gc import time import spaces from typing import Optional, Tuple from huggingface_hub import hf_hub_download # Global pipeline variables txt2img_pipe = None img2img_pipe = None device = "cuda" if torch.cuda.is_available() else "cpu" # Hugging Face model configuration MODEL_REPO = "ajsbsd/CyberRealistic-Pony" MODEL_FILENAME = "cyberrealisticPony_v110.safetensors" def clear_memory(): """Clear GPU memory""" if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() def load_models(): """Load both text2img and img2img pipelines optimized for Spaces""" global txt2img_pipe, img2img_pipe try: print("Loading CyberRealistic Pony models...") # Download model file using huggingface_hub print(f"Downloading model from {MODEL_REPO}...") model_path = hf_hub_download( repo_id=MODEL_REPO, filename=MODEL_FILENAME, cache_dir="/tmp/hf_cache" # Use tmp for Spaces ) print(f"Model downloaded to: {model_path}") # Load Text2Img pipeline if txt2img_pipe is None: txt2img_pipe = StableDiffusionXLPipeline.from_single_file( model_path, torch_dtype=torch.float16 if device == "cuda" else torch.float32, use_safetensors=True, variant="fp16" if device == "cuda" else None ) # Aggressive memory optimizations for Spaces txt2img_pipe.enable_attention_slicing() txt2img_pipe.enable_vae_slicing() if device == "cuda": txt2img_pipe.enable_model_cpu_offload() txt2img_pipe.enable_sequential_cpu_offload() else: txt2img_pipe = txt2img_pipe.to(device) # Share components for Img2Img to save memory if img2img_pipe is None: img2img_pipe = StableDiffusionXLImg2ImgPipeline( vae=txt2img_pipe.vae, text_encoder=txt2img_pipe.text_encoder, text_encoder_2=txt2img_pipe.text_encoder_2, tokenizer=txt2img_pipe.tokenizer, tokenizer_2=txt2img_pipe.tokenizer_2, unet=txt2img_pipe.unet, scheduler=txt2img_pipe.scheduler, ) # Same optimizations img2img_pipe.enable_attention_slicing() img2img_pipe.enable_vae_slicing() if device == "cuda": img2img_pipe.enable_model_cpu_offload() img2img_pipe.enable_sequential_cpu_offload() print("Models loaded successfully!") return True except Exception as e: print(f"Error loading models: {e}") return False def enhance_prompt(prompt: str, add_quality_tags: bool = True) -> str: """Enhance prompt with Pony-style tags""" if not prompt.strip(): return prompt if prompt.startswith("score_") or not add_quality_tags: return prompt quality_tags = "score_9, score_8_up, score_7_up, masterpiece, best quality, highly detailed" return f"{quality_tags}, {prompt}" def validate_dimensions(width: int, height: int) -> Tuple[int, int]: """Ensure dimensions are valid for SDXL""" width = ((width + 63) // 64) * 64 height = ((height + 63) // 64) * 64 # More conservative limits for Spaces width = max(512, min(1024, width)) height = max(512, min(1024, height)) return width, height @spaces.GPU(duration=60) # GPU decorator for Spaces def generate_txt2img(prompt, negative_prompt, num_steps, guidance_scale, width, height, seed, add_quality_tags): """Generate image from text prompt with Spaces GPU support""" global txt2img_pipe if not prompt.strip(): return None, "Please enter a prompt" # Lazy load models if txt2img_pipe is None: if not load_models(): return None, "Failed to load models. Please try again." try: clear_memory() # Validate dimensions width, height = validate_dimensions(width, height) # Set seed generator = None if seed != -1: generator = torch.Generator(device=device).manual_seed(int(seed)) # Enhance prompt enhanced_prompt = enhance_prompt(prompt, add_quality_tags) print(f"Generating: {enhanced_prompt[:100]}...") start_time = time.time() # Generate with lower memory usage with torch.no_grad(): result = txt2img_pipe( prompt=enhanced_prompt, negative_prompt=negative_prompt or "", num_inference_steps=min(int(num_steps), 30), # Limit steps for Spaces guidance_scale=float(guidance_scale), width=width, height=height, generator=generator ) generation_time = time.time() - start_time status = f"Generated in {generation_time:.1f}s ({width}x{height})" return result.images[0], status except Exception as e: return None, f"Generation failed: {str(e)}" finally: clear_memory() @spaces.GPU(duration=60) # GPU decorator for Spaces def generate_img2img(input_image, prompt, negative_prompt, num_steps, guidance_scale, strength, seed, add_quality_tags): """Generate image from input image + text prompt with Spaces GPU support""" global img2img_pipe if input_image is None: return None, "Please upload an input image" if not prompt.strip(): return None, "Please enter a prompt" # Lazy load models if img2img_pipe is None: if not load_models(): return None, "Failed to load models. Please try again." try: clear_memory() # Set seed generator = None if seed != -1: generator = torch.Generator(device=device).manual_seed(int(seed)) # Enhance prompt enhanced_prompt = enhance_prompt(prompt, add_quality_tags) # Process input image if isinstance(input_image, Image.Image): if input_image.mode != 'RGB': input_image = input_image.convert('RGB') # Conservative resize for Spaces max_size = 768 input_image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) w, h = input_image.size w, h = validate_dimensions(w, h) input_image = input_image.resize((w, h), Image.Resampling.LANCZOS) print(f"Transforming: {enhanced_prompt[:100]}...") start_time = time.time() with torch.no_grad(): result = img2img_pipe( prompt=enhanced_prompt, negative_prompt=negative_prompt or "", image=input_image, num_inference_steps=min(int(num_steps), 30), # Limit steps guidance_scale=float(guidance_scale), strength=float(strength), generator=generator ) generation_time = time.time() - start_time status = f"Transformed in {generation_time:.1f}s (Strength: {strength})" return result.images[0], status except Exception as e: return None, f"Transformation failed: {str(e)}" finally: clear_memory() # Simplified negative prompt for better performance DEFAULT_NEGATIVE = """ (low quality:1.3), (worst quality:1.3), (bad quality:1.2), blurry, noisy, ugly, deformed, (text, watermark:1.4), (extra limbs:1.3), (bad hands:1.3), (bad anatomy:1.2) """ # Gradio interface optimized for Spaces with gr.Blocks( title="CyberRealistic Pony Generator", theme=gr.themes.Soft() ) as demo: gr.Markdown(""" # 🎨 CyberRealistic Pony Image Generator Generate high-quality images using the CyberRealistic Pony SDXL model. ⚠️ **Note**: First generation may take longer as the model loads. GPU time is limited on Spaces. """) with gr.Tabs(): with gr.TabItem("🎨 Text to Image"): with gr.Row(): with gr.Column(): txt2img_prompt = gr.Textbox( label="Prompt", placeholder="beautiful landscape, mountains, sunset", lines=2 ) with gr.Accordion("Advanced Settings", open=False): txt2img_negative = gr.Textbox( label="Negative Prompt", value=DEFAULT_NEGATIVE, lines=2 ) txt2img_quality_tags = gr.Checkbox( label="Add Quality Tags", value=True ) with gr.Row(): txt2img_steps = gr.Slider(10, 30, 20, step=1, label="Steps") txt2img_guidance = gr.Slider(1.0, 15.0, 7.5, step=0.5, label="Guidance") with gr.Row(): txt2img_width = gr.Slider(512, 1024, 768, step=64, label="Width") txt2img_height = gr.Slider(512, 1024, 768, step=64, label="Height") txt2img_seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) txt2img_btn = gr.Button("🎨 Generate", variant="primary", size="lg") with gr.Column(): txt2img_output = gr.Image(label="Generated Image", height=400) txt2img_status = gr.Textbox(label="Status", interactive=False) with gr.TabItem("🖼️ Image to Image"): with gr.Row(): with gr.Column(): img2img_input = gr.Image(label="Input Image", type="pil", height=250) img2img_prompt = gr.Textbox( label="Prompt", placeholder="digital painting style, vibrant colors", lines=2 ) with gr.Accordion("Advanced Settings", open=False): img2img_negative = gr.Textbox( label="Negative Prompt", value=DEFAULT_NEGATIVE, lines=2 ) img2img_quality_tags = gr.Checkbox( label="Add Quality Tags", value=True ) with gr.Row(): img2img_steps = gr.Slider(10, 30, 20, step=1, label="Steps") img2img_guidance = gr.Slider(1.0, 15.0, 7.5, step=0.5, label="Guidance") img2img_strength = gr.Slider( 0.1, 1.0, 0.75, step=0.05, label="Strength (Higher = more creative)" ) img2img_seed = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) img2img_btn = gr.Button("🖼️ Transform", variant="primary", size="lg") with gr.Column(): img2img_output = gr.Image(label="Generated Image", height=400) img2img_status = gr.Textbox(label="Status", interactive=False) # Event handlers txt2img_btn.click( fn=generate_txt2img, inputs=[txt2img_prompt, txt2img_negative, txt2img_steps, txt2img_guidance, txt2img_width, txt2img_height, txt2img_seed, txt2img_quality_tags], outputs=[txt2img_output, txt2img_status] ) img2img_btn.click( fn=generate_img2img, inputs=[img2img_input, img2img_prompt, img2img_negative, img2img_steps, img2img_guidance, img2img_strength, img2img_seed, img2img_quality_tags], outputs=[img2img_output, img2img_status] ) print(f"🚀 CyberRealistic Pony Generator initialized on {device}") if __name__ == "__main__": demo.launch()