import os import json import copy import math import time import random import logging import numpy as np from typing import Any, Dict, List, Optional, Union import torch from PIL import Image import gradio as gr import spaces from diffusers import ( DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL, AutoPipelineForImage2Image,) from huggingface_hub import ( hf_hub_download, HfFileSystem, ModelCard, snapshot_download) from diffusers.utils import load_image import requests from urllib.parse import urlparse import tempfile import shutil import uuid import zipfile # META: CUDA_CHECK / GPU_INFO device = "cuda" if torch.cuda.is_available() else "cpu" print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES")) print("torch.__version__ =", torch.__version__) print("torch.version.cuda =", torch.version.cuda) print("cuda available:", torch.cuda.is_available()) print("cuda device count:", torch.cuda.device_count()) if torch.cuda.is_available(): print("current device:", torch.cuda.current_device()) print("device name:", torch.cuda.get_device_name(torch.cuda.current_device())) print("Using device:", device) loras = [ # Sample Qwen-compatible LoRAs { "image": "https://huggingface.co/damnthatai/Game_Boy_Camera_Pixel_Style_Qwen/resolve/main/images/20250818090201_Qwen8s_00001_.jpg", "title": "Camera Pixel Style", "repo": "damnthatai/Game_Boy_Camera_Pixel_Style_Qwen", "weights": "g4m3b0yc4m3r4_qwen.safetensors", "trigger_word": "g4m3b0yc4m3r4, grayscale, pixel photo" }, { "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Studio-Realism/resolve/main/images/2.png", "title": "Studio Realism", "repo": "prithivMLmods/Qwen-Image-Studio-Realism", "weights": "qwen-studio-realism.safetensors", "trigger_word": "Studio Realism" }, { "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Sketch-Smudge/resolve/main/images/1.png", "title": "Sketch Smudge", "repo": "prithivMLmods/Qwen-Image-Sketch-Smudge", "weights": "qwen-sketch-smudge.safetensors", "trigger_word": "Sketch Smudge" }, { "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Anime-LoRA/resolve/main/images/1.png", "title": "Qwen Anime", "repo": "prithivMLmods/Qwen-Image-Anime-LoRA", "weights": "qwen-anime.safetensors", "trigger_word": "Qwen Anime" }, { "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Fragmented-Portraiture/resolve/main/images/3.png", "title": "Fragmented Portraiture", "repo": "prithivMLmods/Qwen-Image-Fragmented-Portraiture", "weights": "qwen-fragmented-portraiture.safetensors", "trigger_word": "Fragmented Portraiture" }, { "image": "https://huggingface.co/prithivMLmods/Qwen-Image-Synthetic-Face/resolve/main/images/2.png", "title": "Synthetic Face", "repo": "prithivMLmods/Qwen-Image-Synthetic-Face", "weights": "qwen-synthetic-face.safetensors", "trigger_word": "Synthetic Face" }, { "image": "https://huggingface.co/Tomechi02/Macne_style_enahncer/resolve/main/images/pixai-1913880604374308947-2.png", "title": "Macne Style Enahncer", "repo": "Tomechi02/Macne_style_enahncer", "weights": "Macne_Style_enhancer.safetensors", "trigger_word": "macloid, gomoku" }, { "image": "https://huggingface.co/itspoidaman/qwenglitch/resolve/main/images/GyZTwJIbkAAhS4h.jpeg", "title": "Qwen Glitch", "repo": "itspoidaman/qwenglitch", "weights": "qwenglitch1.safetensors", "trigger_word": "qwenglitch" }, { "image": "https://huggingface.co/alfredplpl/qwen-image-modern-anime-lora/resolve/main/sample1.jpg", "title": "Modern Anime Lora", "repo": "alfredplpl/qwen-image-modern-anime-lora", "weights": "lora.safetensors", "trigger_word": "Japanese modern anime style" }, { "image": "https://huggingface.co/damnthatai/Apple_QuickTake_150_Digital_Camera_Qwen/resolve/main/images/20250817084713_Qwen.jpg", "title": "Apple QuickTake 150 Digital Camera", "repo": "damnthatai/Apple_QuickTake_150_Digital_Camera_Qwen", "weights": "quicktake150style_qwen.safetensors", "trigger_word": "quicktake150style" }, ] # Initialize the base model dtype = torch.bfloat16 base_model = "Qwen/Qwen-Image" # Scheduler configuration from the Qwen-Image-Lightning repository scheduler_config = { "base_image_seq_len": 256, "base_shift": math.log(3), "invert_sigmas": False, "max_image_seq_len": 8192, "max_shift": math.log(3), "num_train_timesteps": 1000, "shift": 1.0, "shift_terminal": None, "stochastic_sampling": False, "time_shift_type": "exponential", "use_beta_sigmas": False, "use_dynamic_shifting": True, "use_exponential_sigmas": False, "use_karras_sigmas": False, } scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) pipe = DiffusionPipeline.from_pretrained( base_model, scheduler=scheduler, torch_dtype=dtype ).to(device) taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device) good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device) pipe.vae = taef1 pipe_i2i = AutoPipelineForImage2Image.from_pretrained( base_model, vae=good_vae, transformer=pipe.transformer, text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer, text_encoder_2=pipe.text_encoder_2, tokenizer_2=pipe.tokenizer_2, scheduler=scheduler, torch_dtype=dtype ).to(device) # Lightning LoRA info (no global state) LIGHTNING_LORA_REPO = "lightx2v/Qwen-Image-Lightning" LIGHTNING_LORA_WEIGHT = "Qwen-Image-Lightning-8steps-V1.0.safetensors" MAX_SEED = 2**32 - 1 class calculateDuration: def __init__(self, activity_name=""): self.activity_name = activity_name def __enter__(self): self.start_time = time.time() return self def __exit__(self, exc_type, exc_value, traceback): self.end_time = time.time() self.elapsed_time = self.end_time - self.start_time if self.activity_name: print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds") else: print(f"Elapsed time: {self.elapsed_time:.6f} seconds") def compute_image_dimensions(aspect_ratio): """Converts aspect ratio string to width, height tuple.""" if aspect_ratio == "1:1": return 1024, 1024 elif aspect_ratio == "16:9": return 1152, 640 elif aspect_ratio == "9:16": return 640, 1152 elif aspect_ratio == "4:3": return 1024, 768 elif aspect_ratio == "3:4": return 768, 1024 elif aspect_ratio == "3:2": return 1024, 688 elif aspect_ratio == "2:3": return 688, 1024 else: return 1024, 1024 def handle_lora_selection(evt: gr.SelectData, aspect_ratio): selected_lora = loras[evt.index] new_placeholder = f"Type a prompt for {selected_lora['title']}" lora_repo = selected_lora["repo"] updated_text = f"### Selected: [{lora_repo}](https://huggingface.co/{lora_repo}) ✅" # Update aspect ratio if specified in LoRA config if "aspect" in selected_lora: if selected_lora["aspect"] == "portrait": aspect_ratio = "9:16" elif selected_lora["aspect"] == "landscape": aspect_ratio = "16:9" else: aspect_ratio = "1:1" return ( gr.update(placeholder=new_placeholder), updated_text, evt.index, aspect_ratio, ) def adjust_generation_mode(speed_mode): """Update UI based on speed/quality toggle.""" if speed_mode == "Fast (8 steps)": return gr.update(value="Fast mode selected - 8 steps with Lightning LoRA"), 8, 1.0 else: return gr.update(value="Base mode selected - 48 steps for best quality"), 48, 4.0 @spaces.GPU(duration=100) def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, negative_prompt=""): generator = torch.Generator(device="cuda").manual_seed(seed) pipe.to("cuda") batch_size = 1 prompt = prompt_mash do_classifier_free_guidance = cfg_scale > 1.0 prompt_embeds, pooled_prompt_embeds = pipe.encode_prompt( prompt, num_images_per_prompt=1, do_classifier_free_guidance=do_classifier_free_guidance, prompt_2=None, max_sequence_length=256, ) height, width = height - height % 16, width - width % 16 latents = pipe.prepare_latents( batch_size, pipe.transformer.config.in_channels, height, width, dtype, device, generator, latents=None, ) pipe.scheduler.set_timesteps(steps) timesteps = pipe.scheduler.timesteps joint_attention_kwargs = {"scale": lora_scale} for i in range(steps): t = pipe.scheduler.sigmas[i] latent_model_input = latents with torch.no_grad(): noise_pred = pipe.transformer( hidden_states=latent_model_input, timestep=t, guidance=cfg_scale, pooled_projections=pooled_prompt_embeds, encoder_hidden_states=prompt_embeds, joint_attention_kwargs=joint_attention_kwargs, return_dict=False, )[0] latents = pipe.scheduler.step( model_output=noise_pred, timestep=t, sample=latent_model_input, return_dict=False, )[0] # preview with torch.no_grad(): decoded = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0] image = pipe.image_processor.pt_to_pil(decoded)[0] yield image # final with torch.no_grad(): decoded = good_vae.decode(latents / good_vae.config.scaling_factor, return_dict=False)[0] image = pipe.image_processor.pt_to_pil(decoded)[0] yield image @spaces.GPU(duration=100) def generate_image_to_image(prompt_mash, image_input_path, image_strength, steps, cfg_scale, width, height, lora_scale, seed): generator = torch.Generator(device="cuda").manual_seed(seed) pipe_i2i.to("cuda") image_input = load_image(image_input_path) final_image = pipe_i2i( prompt=prompt_mash, image=image_input, strength=image_strength, num_inference_steps=steps, guidance_scale=cfg_scale, width=width, height=height, generator=generator, joint_attention_kwargs={"scale": lora_scale}, output_type="pil", ).images[0] return final_image @spaces.GPU(duration=100) def process_adapter_generation(prompt, cfg_scale, steps, selected_index, randomize_seed, seed, aspect_ratio, lora_scale, speed_mode, image_input, image_strength, negative_prompt="", progress=gr.Progress(track_tqdm=True)): if selected_index is None: raise gr.Error("You must select a LoRA before proceeding.") selected_lora = loras[selected_index] lora_path = selected_lora["repo"] trigger_word = selected_lora["trigger_word"] # Prepare prompt with trigger word if trigger_word: if "trigger_position" in selected_lora: if selected_lora["trigger_position"] == "prepend": prompt_mash = f"{trigger_word} {prompt}" else: prompt_mash = f"{prompt} {trigger_word}" else: prompt_mash = f"{trigger_word} {prompt}" else: prompt_mash = prompt # Always unload any existing LoRAs first to avoid conflicts with calculateDuration("Unloading existing LoRAs"): pipe.unload_lora_weights() pipe_i2i.unload_lora_weights() pipe_to_use = pipe_i2i if image_input is not None else pipe if speed_mode == "Fast (8 steps)": with calculateDuration("Loading Lightning LoRA and style LoRA"): # Load Lightning LoRA first pipe_to_use.load_lora_weights( LIGHTNING_LORA_REPO, weight_name=LIGHTNING_LORA_WEIGHT, adapter_name="lightning" ) # Load the selected style LoRA weight_name = selected_lora.get("weights", None) pipe_to_use.load_lora_weights( lora_path, weight_name=weight_name, low_cpu_mem_usage=True, adapter_name="style" ) # Set both adapters active with their weights pipe_to_use.set_adapters(["lightning", "style"], adapter_weights=[1.0, lora_scale]) else: # Quality mode - only load the style LoRA with calculateDuration(f"Loading LoRA weights for {selected_lora['title']}"): weight_name = selected_lora.get("weights", None) pipe_to_use.load_lora_weights( lora_path, weight_name=weight_name, low_cpu_mem_usage=True ) # Set random seed for reproducibility with calculateDuration("Randomizing seed"): if randomize_seed: seed = random.randint(0, MAX_SEED) # Get image dimensions from aspect ratio width, height = compute_image_dimensions(aspect_ratio) if image_input is not None: final_image = generate_image_to_image(prompt_mash, image_input, image_strength, steps, cfg_scale, width, height, lora_scale, seed) yield final_image, seed, gr.update(visible=False) else: image_generator = generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, negative_prompt) step_counter = 0 for image in image_generator: step_counter += 1 progress_bar = f'
"+trigger_word+"
as the trigger word" if trigger_word else "No trigger word found. If there's a trigger word, include it in your prompt"}