#!/usr/bin/env python # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is import spaces import torchruntime torchruntime.init_torch() import os import random import uuid import gradio as gr import numpy as np from PIL import Image import torch #import diffusers from diffusers import AutoencoderKL, StableDiffusionXLPipeline from diffusers import EulerAncestralDiscreteScheduler from typing import Tuple import paramiko import datetime #from diffusers import DPMSolverSDEScheduler from diffusers.models.attention_processor import AttnProcessor2_0 from transformers import CLIPTextModelWithProjection, CLIPTextModel torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False torch.backends.cudnn.allow_tf32 = False torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = False torch.backends.cuda.preferred_blas_library="cublas" torch.backends.cuda.preferred_linalg_library="cusolver" torch.set_float32_matmul_precision("highest") FTP_HOST = "1ink.us" FTP_USER = "ford442" FTP_PASS = os.getenv("FTP_PASS") FTP_DIR = "1ink.us/stable_diff/" # Remote directory on FTP server DESCRIPTIONXX = """ ## ⚡⚡⚡⚡ REALVISXL V5.0 BF16 (Tester B) ⚡⚡⚡⚡ """ examples = [ "Many apples splashed with drops of water within a fancy bowl 4k, hdr --v 6.0 --style raw", "A profile photo of a dog, brown background, shot on Leica M6 --ar 128:85 --v 6.0 --style raw", ] MODEL_OPTIONS = { "REALVISXL V5.0 BF16": "ford442/RealVisXL_V5.0_BF16", } MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096")) BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1")) style_list = [ { "name": "3840 x 2160", "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "2560 x 1440", "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "HD+", "prompt": "hyper-realistic 2K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "Style Zero", "prompt": "{prompt}", "negative_prompt": "", }, ] styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list} DEFAULT_STYLE_NAME = "Style Zero" STYLE_NAMES = list(styles.keys()) HF_TOKEN = os.getenv("HF_TOKEN") os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") def load_and_prepare_model(): #vaeRV = AutoencoderKL.from_pretrained("SG161222/RealVisXL_V5.0", subfolder='vae', safety_checker=None, use_safetensors=True, token=True) #vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False, low_cpu_mem_usage=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16) vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", low_cpu_mem_usage=False, safety_checker=None, use_safetensors=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16) #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True) #sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler') #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", token=True) #, beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True, token=True) #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear") pipe = StableDiffusionXLPipeline.from_pretrained( 'ford442/RealVisXL_V5.0_BF16', #torch_dtype=torch.bfloat16, token=True, add_watermarker=False, #text_encoder=None, #text_encoder_2=None, vae=None, ) #pipe.vae = vaeXL #.to(torch.bfloat16) #pipe.scheduler = sched #pipe.vae.do_resize=False #pipe.vae.vae_scale_factor=8 #pipe.to(device) #pipe.to(torch.bfloat16) print(f'init noise scale: {pipe.scheduler.init_noise_sigma}') pipe.watermark=None pipe.safety_checker=None #pipe.unet.to(memory_format=torch.channels_last) #pipe.enable_vae_tiling() pipe.to(device=device, dtype=torch.bfloat16) pipe.vae = vaeXL.to(device) #.to('cpu') #.to(torch.bfloat16) pipe.unet.set_attn_processor(AttnProcessor2_0()) pipe.vae.set_default_attn_processor() return pipe pipe = load_and_prepare_model() text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True)#.to(device=device, dtype=torch.bfloat16) text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True)#.to(device=device, dtype=torch.bfloat16) MAX_SEED = np.iinfo(np.int32).max neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' " def upload_to_ftp(filename): try: transport = paramiko.Transport((FTP_HOST, 22)) if filename.endswith(".txt"): destination_path=FTP_DIR+'/txt/'+filename else: destination_path=FTP_DIR+filename transport.connect(username = FTP_USER, password = FTP_PASS) sftp = paramiko.SFTPClient.from_transport(transport) sftp.put(filename, destination_path) sftp.close() transport.close() print(f"Uploaded {filename} to FTP server") except Exception as e: print(f"FTP upload error: {e}") def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]: if style_name in styles: p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME]) else: p, n = styles[DEFAULT_STYLE_NAME] if not negative: negative = "" return p.replace("{prompt}", positive), n + negative def save_image(img): unique_name = str(uuid.uuid4()) + ".png" img.save(unique_name,optimize=False,compress_level=0) return unique_name def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp): filename= f'tst_B_{timestamp}.txt' with open(filename, "w") as f: f.write(f"Realvis 5.0 (Tester B) \n") f.write(f"Date/time: {timestamp} \n") f.write(f"Prompt: {prompt} \n") f.write(f"Steps: {num_inference_steps} \n") f.write(f"Guidance Scale: {guidance_scale} \n") f.write(f"SPACE SETUP: \n") f.write(f"Use Model Dtype: no \n") f.write(f"Model Scheduler: Euler_a all_custom before cuda \n") f.write(f"To cuda and bfloat \n") upload_to_ftp(filename) @spaces.GPU(duration=30) def generate_30( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, style_selection: str = "", width: int = 768, height: int = 768, guidance_scale: float = 4, num_inference_steps: int = 125, use_resolution_binning: bool = True, progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument ): seed = random.randint(0, MAX_SEED) generator = torch.Generator(device='cuda').manual_seed(seed) pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16) pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16) options = { "prompt": [prompt], "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "pil", } if use_resolution_binning: options["use_resolution_binning"] = True images = [] timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") uploadNote(prompt,num_inference_steps,guidance_scale,timestamp) batch_options = options.copy() rv_image = pipe(**batch_options).images[0] sd_image_path = f"rv50_B_{timestamp}.png" rv_image.save(sd_image_path,optimize=False,compress_level=0) upload_to_ftp(sd_image_path) unique_name = str(uuid.uuid4()) + ".png" os.symlink(sd_image_path, unique_name) return [unique_name] @spaces.GPU(duration=60) def generate_60( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, style_selection: str = "", width: int = 768, height: int = 768, guidance_scale: float = 4, num_inference_steps: int = 125, use_resolution_binning: bool = True, progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument ): seed = random.randint(0, MAX_SEED) generator = torch.Generator(device='cuda').manual_seed(seed) pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16) pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16) options = { "prompt": [prompt], "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "pil", } if use_resolution_binning: options["use_resolution_binning"] = True images = [] timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") uploadNote(prompt,num_inference_steps,guidance_scale,timestamp) batch_options = options.copy() rv_image = pipe(**batch_options).images[0] sd_image_path = f"rv50_B_{timestamp}.png" rv_image.save(sd_image_path,optimize=False,compress_level=0) upload_to_ftp(sd_image_path) unique_name = str(uuid.uuid4()) + ".png" os.symlink(sd_image_path, unique_name) return [unique_name] @spaces.GPU(duration=90) def generate_90( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, style_selection: str = "", width: int = 768, height: int = 768, guidance_scale: float = 4, num_inference_steps: int = 125, use_resolution_binning: bool = True, progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument ): seed = random.randint(0, MAX_SEED) generator = torch.Generator(device='cuda').manual_seed(seed) pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16) pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16) options = { "prompt": [prompt], "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "pil", } if use_resolution_binning: options["use_resolution_binning"] = True images = [] timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") uploadNote(prompt,num_inference_steps,guidance_scale,timestamp) batch_options = options.copy() rv_image = pipe(**batch_options).images[0] sd_image_path = f"rv50_B_{timestamp}.png" rv_image.save(sd_image_path,optimize=False,compress_level=0) upload_to_ftp(sd_image_path) unique_name = str(uuid.uuid4()) + ".png" os.symlink(sd_image_path, unique_name) return [unique_name] def load_predefined_images1(): predefined_images1 = [ "assets/7.png", "assets/8.png", "assets/9.png", "assets/1.png", "assets/2.png", "assets/3.png", "assets/4.png", "assets/5.png", "assets/6.png", ] return predefined_images1 css = ''' #col-container { margin: 0 auto; max-width: 640px; } h1{text-align:center} footer { visibility: hidden } body { background-color: green; } ''' with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo: gr.Markdown(DESCRIPTIONXX) with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button_30 = gr.Button("Run 30 Seconds", scale=0) run_button_60 = gr.Button("Run 60 Seconds", scale=0) run_button_90 = gr.Button("Run 90 Seconds", scale=0) result = gr.Gallery(label="Result", columns=1, show_label=False) with gr.Row(): style_selection = gr.Radio( show_label=True, container=True, interactive=True, choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, label="Quality Style", ) with gr.Row(): with gr.Column(scale=1): use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True) negative_prompt = gr.Text( label="Negative prompt", max_lines=5, lines=4, placeholder="Enter a negative prompt", value="('deformed', 'distorted', 'disfigured':1.3),'not photorealistic':1.5, 'poorly drawn', 'bad anatomy', 'wrong anatomy', 'extra limb', 'missing limb', 'floating limbs', 'poorly drawn hands', 'poorly drawn feet', 'poorly drawn face':1.3, 'out of frame', 'extra limbs', 'bad anatomy', 'bad art', 'beginner', 'distorted face','amateur'", visible=True, ) with gr.Row(): width = gr.Slider( label="Width", minimum=448, maximum=MAX_IMAGE_SIZE, step=64, value=768, ) height = gr.Slider( label="Height", minimum=448, maximum=MAX_IMAGE_SIZE, step=64, value=768, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.1, maximum=30, step=0.1, value=3.8, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=10, maximum=1000, step=10, value=170, ) gr.Examples( examples=examples, inputs=prompt, cache_examples=False ) use_negative_prompt.change( fn=lambda x: gr.update(visible=x), inputs=use_negative_prompt, outputs=negative_prompt, api_name=False, ) gr.on( triggers=[ run_button_30.click, ], # api_name="generate", # Add this line fn=generate_30, inputs=[ prompt, negative_prompt, use_negative_prompt, style_selection, width, height, guidance_scale, num_inference_steps, ], outputs=[result], ) gr.on( triggers=[ run_button_60.click, ], # api_name="generate", # Add this line fn=generate_60, inputs=[ prompt, negative_prompt, use_negative_prompt, style_selection, width, height, guidance_scale, num_inference_steps, ], outputs=[result], ) gr.on( triggers=[ run_button_90.click, ], # api_name="generate", # Add this line fn=generate_90, inputs=[ prompt, negative_prompt, use_negative_prompt, style_selection, width, height, guidance_scale, num_inference_steps, ], outputs=[result], ) gr.Markdown("### REALVISXL V5.0") predefined_gallery = gr.Gallery(label="REALVISXL V5.0", columns=3, show_label=False, value=load_predefined_images1()) #gr.Markdown("### LIGHTNING V5.0") #predefined_gallery = gr.Gallery(label="LIGHTNING V5.0", columns=3, show_label=False, value=load_predefined_images()) gr.Markdown( """