RealVis_v5.0_BF16_B

Paused

App Files Files Community

ford442 commited on Feb 23

Commit

4b7af22

verified ·

1 Parent(s): 2b64a43

Update app.py

Browse files

Files changed (1) hide show

app.py +356 -147

app.py CHANGED Viewed

@@ -5,38 +5,119 @@
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 import spaces
-import os
 import random
 import uuid
 import gradio as gr
 import numpy as np
 from PIL import Image
-import torch
-#import diffusers
-from diffusers import AutoencoderKL, StableDiffusionXLPipeline, UNet2DConditionModel
 from diffusers import EulerAncestralDiscreteScheduler
 from typing import Tuple
 import paramiko
 import datetime
-#from diffusers import DPMSolverSDEScheduler
-from diffusers.models.attention_processor import AttnProcessor2_0
-from transformers import CLIPTextModelWithProjection, CLIPTextModel
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
 torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
 torch.backends.cudnn.benchmark = False
-torch.backends.cuda.preferred_blas_library="cublas"
-torch.backends.cuda.preferred_linalg_library="cusolver"
 torch.set_float32_matmul_precision("highest")
-FTP_HOST = "1ink.us"
-FTP_USER = "ford442"
-FTP_PASS = os.getenv("FTP_PASS")
-FTP_DIR = "1ink.us/stable_diff/"  # Remote directory on FTP server
 DESCRIPTIONXX = """
     ## ⚡⚡⚡⚡ REALVISXL V5.0 BF16 (Tester B) ⚡⚡⚡⚡
@@ -81,77 +162,137 @@ styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 DEFAULT_STYLE_NAME = "Style Zero"
 STYLE_NAMES = list(styles.keys())
 HF_TOKEN = os.getenv("HF_TOKEN")
-os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 def load_and_prepare_model():
-    #vaeRV = AutoencoderKL.from_pretrained("SG161222/RealVisXL_V5.0", subfolder='vae', safety_checker=None, use_safetensors=True, token=True)
-    #vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False, low_cpu_mem_usage=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
-    vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", low_cpu_mem_usage=False, safety_checker=None, use_safetensors=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
-    #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
-    #sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
-    #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", token=True) #, beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True, token=True)
     #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
     pipe = StableDiffusionXLPipeline.from_pretrained(
         'ford442/RealVisXL_V5.0_BF16',
         #torch_dtype=torch.bfloat16,
-        token=True,
         add_watermarker=False,
-        #text_encoder=None,
-        #text_encoder_2=None,
-        unet=None,
-        vae=None,
     )
-    pipe.unet=UNet2DConditionModel.from_pretrained(
-            'ford442/RealVisXL_V5.0_BF16',
-            low_cpu_mem_usage=False,
-            subfolder='unet',
-            upcast_attention=True,
-            #attention_type='gated-text-image',
-            token=True)
-    #pipe.vae = vaeXL #.to(torch.bfloat16)
-    #pipe.scheduler = sched
-    #pipe.vae.do_resize=False
-    #pipe.vae.vae_scale_factor=8
     #pipe.to(device)
     #pipe.to(torch.bfloat16)
     print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
     pipe.watermark=None
-    pipe.safety_checker=None
     #pipe.unet.to(memory_format=torch.channels_last)
     #pipe.enable_vae_tiling()
-    pipe.to(device, torch.bfloat16)
-    pipe.vae = vaeXL.to(device) #.to('cpu') #.to(torch.bfloat16)
-    pipe.unet.set_attn_processor(AttnProcessor2_0())
-    pipe.vae.set_default_attn_processor()
     return pipe
 pipe = load_and_prepare_model()
-text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True)#.to(device=device, dtype=torch.bfloat16)
-text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True)#.to(device=device, dtype=torch.bfloat16)
-MAX_SEED = np.iinfo(np.int32).max
 neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
-from huggingface_hub import InferenceClient
-client = InferenceClient(
-    model="stable-diffusion-v1-5/stable-diffusion-v1-5",
-	#provider="hf-inference",
-	token=HF_TOKEN
-)
 def upload_to_ftp(filename):
     try:
         transport = paramiko.Transport((FTP_HOST, 22))
-        if filename.endswith(".txt"):
-            destination_path=FTP_DIR+'/txt/'+filename
-        else:
-            destination_path=FTP_DIR+filename
         transport.connect(username = FTP_USER, password = FTP_PASS)
         sftp = paramiko.SFTPClient.from_transport(transport)
         sftp.put(filename, destination_path)
@@ -161,65 +302,97 @@ def upload_to_ftp(filename):
     except Exception as e:
         print(f"FTP upload error: {e}")
-def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
-    if style_name in styles:
-        p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
-    else:
-        p, n = styles[DEFAULT_STYLE_NAME]
-    if not negative:
-        negative = ""
-    return p.replace("{prompt}", positive), n + negative
-def save_image(img):
-    unique_name = str(uuid.uuid4()) + ".png"
-    img.save(unique_name,optimize=False,compress_level=0)
-    return unique_name
 def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
-    filename= f'tst_B_{timestamp}.txt'
     with open(filename, "w") as f:
-        f.write(f"Realvis 5.0 (Tester B) \n")
         f.write(f"Date/time: {timestamp} \n")
         f.write(f"Prompt: {prompt} \n")
         f.write(f"Steps: {num_inference_steps} \n")
         f.write(f"Guidance Scale: {guidance_scale} \n")
         f.write(f"SPACE SETUP: \n")
-        f.write(f"Use Model Dtype: no \n")
         f.write(f"Model Scheduler: Euler_a all_custom before cuda \n")
         f.write(f"To cuda and bfloat \n")
     upload_to_ftp(filename)
-def generate_api(
-    prompt: str,
-    negative_prompt: str = "",
-    use_negative_prompt: bool = False,
-    style_selection: str = "",
-    width: int = 768,
-    height: int = 768,
-    guidance_scale: float = 4,
-    num_inference_steps: int = 125,
-    use_resolution_binning: bool = True,
-    progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
-):
-    seed = random.randint(0, MAX_SEED)
-    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
-    rv_image = client.text_to_image(
-    	prompt = prompt,
-        negative_prompt = negative_prompt,
-        height = height,
-        width = width,
-        num_inference_steps = num_inference_steps,
-        guidance_scale = guidance_scale,
-        seed = seed
-    )
-    sd_image_path = f"sd15_B_{timestamp}.png"
-    rv_image.save(sd_image_path,optimize=False,compress_level=0)
-    upload_to_ftp(sd_image_path)
-    unique_name = str(uuid.uuid4()) + ".png"
-    os.symlink(sd_image_path, unique_name)
-    return [unique_name]
 @spaces.GPU(duration=40)
 def generate_30(
     prompt: str,
@@ -230,13 +403,16 @@ def generate_30(
     height: int = 768,
     guidance_scale: float = 4,
     num_inference_steps: int = 125,
     use_resolution_binning: bool = True,
-    progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
-    pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
-    pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
     options = {
         "prompt": [prompt],
         "negative_prompt": [negative_prompt],
@@ -247,17 +423,26 @@ def generate_30(
         "num_inference_steps": num_inference_steps,
         "generator": generator,
         "output_type": "pil",
     }
     if use_resolution_binning:
         options["use_resolution_binning"] = True
     images = []
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
     batch_options = options.copy()
     rv_image = pipe(**batch_options).images[0]
-    sd_image_path = f"rv50_B_{timestamp}.png"
     rv_image.save(sd_image_path,optimize=False,compress_level=0)
-    upload_to_ftp(sd_image_path)
     unique_name = str(uuid.uuid4()) + ".png"
     os.symlink(sd_image_path, unique_name)
     return [unique_name]
@@ -272,13 +457,16 @@ def generate_60(
     height: int = 768,
     guidance_scale: float = 4,
     num_inference_steps: int = 125,
     use_resolution_binning: bool = True,
-    progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
-    pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
-    pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
     options = {
         "prompt": [prompt],
         "negative_prompt": [negative_prompt],
@@ -289,6 +477,7 @@ def generate_60(
         "num_inference_steps": num_inference_steps,
         "generator": generator,
         "output_type": "pil",
     }
     if use_resolution_binning:
         options["use_resolution_binning"] = True
@@ -296,8 +485,11 @@ def generate_60(
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
     batch_options = options.copy()
     rv_image = pipe(**batch_options).images[0]
-    sd_image_path = f"rv50_B_{timestamp}.png"
     rv_image.save(sd_image_path,optimize=False,compress_level=0)
     upload_to_ftp(sd_image_path)
     unique_name = str(uuid.uuid4()) + ".png"
@@ -314,13 +506,16 @@ def generate_90(
     height: int = 768,
     guidance_scale: float = 4,
     num_inference_steps: int = 125,
     use_resolution_binning: bool = True,
-    progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
-    pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
-    pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
     options = {
         "prompt": [prompt],
         "negative_prompt": [negative_prompt],
@@ -331,6 +526,7 @@ def generate_90(
         "num_inference_steps": num_inference_steps,
         "generator": generator,
         "output_type": "pil",
     }
     if use_resolution_binning:
         options["use_resolution_binning"] = True
@@ -338,8 +534,11 @@ def generate_90(
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
     batch_options = options.copy()
     rv_image = pipe(**batch_options).images[0]
-    sd_image_path = f"rv50_B_{timestamp}.png"
     rv_image.save(sd_image_path,optimize=False,compress_level=0)
     upload_to_ftp(sd_image_path)
     unique_name = str(uuid.uuid4()) + ".png"
@@ -387,7 +586,6 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
         run_button_30 = gr.Button("Run 30 Seconds", scale=0)
         run_button_60 = gr.Button("Run 60 Seconds", scale=0)
         run_button_90 = gr.Button("Run 90 Seconds", scale=0)
-        run_button_api = gr.Button("Run API", scale=0)
     result = gr.Gallery(label="Result", columns=1, show_label=False)
     with gr.Row():
@@ -414,14 +612,14 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
         with gr.Row():
             width = gr.Slider(
                 label="Width",
-                minimum=256,
                 maximum=MAX_IMAGE_SIZE,
                 step=64,
                 value=768,
             )
             height = gr.Slider(
                 label="Height",
-                minimum=256,
                 maximum=MAX_IMAGE_SIZE,
                 step=64,
                 value=768,
@@ -436,11 +634,20 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
             )
             num_inference_steps = gr.Slider(
                 label="Number of inference steps",
-                minimum=1,
                 maximum=1000,
-                step=1,
                 value=180,
             )
     gr.Examples(
         examples=examples,
@@ -470,6 +677,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
             height,
             guidance_scale,
             num_inference_steps,
         ],
         outputs=[result],
     )
@@ -489,6 +697,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
             height,
             guidance_scale,
             num_inference_steps,
         ],
         outputs=[result],
     )
@@ -508,29 +717,11 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
             height,
             guidance_scale,
             num_inference_steps,
         ],
         outputs=[result],
     )
-    gr.on(
-        triggers=[
-            run_button_api.click,
-        ],
-      #  api_name="generate",  # Add this line
-        fn=generate_api,
-        inputs=[
-            prompt,
-            negative_prompt,
-            use_negative_prompt,
-            style_selection,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs=[result],
-    )
     gr.Markdown("### REALVISXL V5.0")
     predefined_gallery = gr.Gallery(label="REALVISXL V5.0", columns=3, show_label=False, value=load_predefined_images1())
@@ -567,6 +758,24 @@ title = "Text Generator Demo GPT-Neo"
 description = "Text Generator Application by ecarbo"
 if __name__ == "__main__":
     demo_interface = demo.queue(max_size=50)  # Remove .launch() here
     text_gen_interface = gr.Interface(

 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 import spaces
+import os
+os.environ["SAFETENSORS_FAST_GPU"] = "1"
+import subprocess
+import re
+def find_cuda_directories(search_paths=None):
+    """Finds directories that contain "cuda" and a version number in their name.
+    Args:
+        search_paths: A list of directories to search. If None, uses common paths.
+    Returns:
+        A dictionary where keys are directory paths and values are extracted versions.
+        Returns an empty dictionary if no CUDA directories are found.
+    """
+    if search_paths is None:
+        # Common CUDA installation locations (customize as needed)
+        search_paths = [
+            "/usr/local",  # Linux
+            "/usr/lib",    # Linux
+            "/opt",       # Linux
+            "/Program Files",  # Windows
+            "/Applications", # macOS (less common)
+            os.path.expanduser("~") # Check user's home directory
+        ]
+        if os.name == 'nt': #Windows
+            search_paths.append("C:\\Program Files")
+            search_paths.append("C:\\Program Files (x86)")
+    cuda_dirs = {}
+    for path in search_paths:
+        if os.path.exists(path):  # Check if the path exists
+            for root, dirs, files in os.walk(path):  # Walk recursively
+                for dir_name in dirs:
+                    match = re.search(r"cuda(\d+(\.\d+)*)", dir_name, re.IGNORECASE)  # Regex for cuda and version
+                    if match:
+                        full_path = os.path.join(root, dir_name)
+                        version = match.group(1)
+                        cuda_dirs[full_path] = version
+    return cuda_dirs
+#subprocess.run(['sh', './torch.sh'])
+#import sys
+#conda_prefix = os.path.expanduser("~/miniconda3")
+#conda_bin = os.path.join(conda_prefix, "bin")
+# Add Conda's bin directory to your PATH
+#os.environ["PATH"] = conda_bin + os.pathsep + os.environ["PATH"]
+# Activate the base environment (adjust if needed)
+#os.system(f'{conda_bin}/conda init  --all')
+#os.system(f'{conda_bin}/conda activate base')
+#os.system(f'{conda_bin}/conda install nvidia/label/cudnn-9.3.0::cudnn')
+#os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-libraries')
+#os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-libraries-dev')
+#os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-cudart')
+#os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-cudart-dev')
+#os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-nvcc')
+#os.system(f'{conda_bin}/conda install nvidia/label/cuda-12.4.0::cuda-toolkit')
+#subprocess.run(['pip', 'install', 'git+https://github.com/hidet-org/hidet.git'])
+#subprocess.run(['pip', 'install', 'git+https://github.com/ford442/hidet.git@thread'])
+#os.system(f'{conda_bin}/conda install pytorch::pytorch-cuda')
+#os.system(f'{conda_bin}/conda install rcdr_py37::tensorrt')
+#subprocess.run(['sh', './hidet.sh'])
+#subprocess.run(['sh', './modelopt.sh'])
+#import hidet
+#print(dir(hidet))
+#import torch_tensorrt
 import random
 import uuid
 import gradio as gr
 import numpy as np
 from PIL import Image
+import diffusers
+from diffusers import AutoencoderKL, StableDiffusionXLPipeline
 from diffusers import EulerAncestralDiscreteScheduler
 from typing import Tuple
 import paramiko
 import datetime
+import cyper
+from image_gen_aux import UpscaleWithModel
+import torch
+#import torch._dynamo
+#torch._dynamo.list_backends()
+import time
+import gc
+import torch.nn.functional as F
+from sageattention import sageattn
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
 torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
 torch.backends.cudnn.benchmark = False
+# torch.backends.cuda.preferred_blas_library="cublas"
+# torch.backends.cuda.preferred_linalg_library="cusolver"
 torch.set_float32_matmul_precision("highest")
 DESCRIPTIONXX = """
     ## ⚡⚡⚡⚡ REALVISXL V5.0 BF16 (Tester B) ⚡⚡⚡⚡
 DEFAULT_STYLE_NAME = "Style Zero"
 STYLE_NAMES = list(styles.keys())
 HF_TOKEN = os.getenv("HF_TOKEN")
+FTP_HOST = os.getenv("FTP_HOST")
+FTP_USER = os.getenv("FTP_USER")
+FTP_PASS = os.getenv("FTP_PASS")
+FTP_DIR = os.getenv("FTP_DIR")
+# os.putenv('TORCH_LINALG_PREFER_CUSOLVER','1')
+os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+os.environ["SAFETENSORS_FAST_GPU"] = "1"
+upscaler = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
+def scheduler_swap_callback(pipeline, step_index, timestep, callback_kwargs):
+        # adjust the batch_size of prompt_embeds according to guidance_scale
+    if step_index == int(pipeline.num_timesteps * 0.1):
+        print("-- swapping scheduler --")
+      #  pipeline.scheduler = euler_scheduler
+        torch.set_float32_matmul_precision("high")
+       # pipe.vae = vae_b
+        torch.backends.cudnn.allow_tf32 = True
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cuda.preferred_blas_library="cublaslt"
+    #if step_index == int(pipeline.num_timesteps * 0.5):
+       # torch.set_float32_matmul_precision("medium")
+        #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.float64)
+        #pipe.unet.to(torch.float64)
+      #  pipe.guidance_scale=1.0
+       # pipe.scheduler.set_timesteps(num_inference_steps*.70)
+      #  print(f"-- setting step {pipeline.num_timesteps * 0.1} --")
+      #  pipeline.scheduler._step_index = pipeline.num_timesteps * 0.1
+    if step_index == int(pipeline.num_timesteps * 0.9):
+        torch.backends.cuda.preferred_blas_library="cublas"
+        torch.backends.cudnn.allow_tf32 = False
+        torch.backends.cuda.matmul.allow_tf32 = False
+        torch.set_float32_matmul_precision("highest")
+        #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.bfloat16)
+        #pipe.unet.to(torch.float64)
+     #   pipe.vae = vae_a
+     #   pipe.unet = unet_a
+        torch.backends.cudnn.deterministic = False
+        #pipe.unet.set_default_attn_processor()
+        print("-- swapping scheduler --")
+      #  pipeline.scheduler = heun_scheduler
+        #pipe.scheduler.set_timesteps(num_inference_steps*.70)
+      #  print(f"-- setting step {pipeline.num_timesteps * 0.9} --")
+      #  pipeline.scheduler._step_index = pipeline.num_timesteps * 0.9
+    return {"latents": callback_kwargs["latents"]}
 def load_and_prepare_model():
+    sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1 ,use_karras_sigmas=True)
+    vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False, device_map='cpu') #.to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
+    #vaeRV = AutoencoderKL.from_pretrained("SG161222/RealVisXL_V5.0", subfolder='vae', safety_checker=None, use_safetensors=False).to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
     #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
+    #txt_1 = CLIPTextModel.from_pretrained(device_map??)
+    #txt_2 = CLIPTextModel.from_pretrained(vae too?)
+    #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
     pipe = StableDiffusionXLPipeline.from_pretrained(
         'ford442/RealVisXL_V5.0_BF16',
         #torch_dtype=torch.bfloat16,
         add_watermarker=False,
+      #  low_cpu_mem_usage = False,
+        token = HF_TOKEN,
+      #  scheduler = sched,
     )
+    #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1) #,use_karras_sigmas=True)
+    pipe.vae = vaeXL #.to(torch.bfloat16)
+    pipe.scheduler = sched
+    pipe.vae.do_resize = False
+    #pipe.vae.vae_scale_factor = 8
+    pipe.vae.do_convert_rgb = True
+    pipe.vae.set_default_attn_processor()
     #pipe.to(device)
     #pipe.to(torch.bfloat16)
     print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
     pipe.watermark=None
+    pipe.safety_checker=None
+    '''       # Freeze vae and unet
+    pipe.vae.requires_grad_(False)
+    pipe.unet.requires_grad_(False)
+    pipe.text_encoder.requires_grad_(False)
+    pipe.unet.eval()
+    pipe.vae.eval()
+    pipe.text_encoder.eval()
+    '''
+    #pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
+    #pipe.load_lora_weights("ford442/sdxl-vae-bf16", weight_name="LoRA/FLUX-dev-lora-add_details.safetensors", low_cpu_mem_usage=False)
     #pipe.unet.to(memory_format=torch.channels_last)
     #pipe.enable_vae_tiling()
+    #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, mode='max-autotune') #.to(device=device, dtype=torch.bfloat16)
+    #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, mode='max-autotune-no-cudagraphs') #.to(device=device, dtype=torch.bfloat16)
+    #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, options={'epilogue_fusion': True, 'shape_padding': True}) #.to(device=device, dtype=torch.bfloat16)
+    #pipe.unet = torch.compile(pipe.unet, dynamic=False)
+    #pipe.unet = torch.compile(pipe.unet, backend="hidet", dynamic=False, options={"search_space": 0})
+    #pipe.unet = torch.compile(pipe.unet, backend="torch_tensorrt", dynamic=False, options={"precision": torch.bfloat16,"optimization_level": 4,})
+    pipe.to(torch.device('cuda:0'), torch.bfloat16)
     return pipe
+#hidet.option.parallel_build(False)
+#hidet.option.parallel_tune(2,2.0)
+#torch._dynamo.config.suppress_errors = True
+#torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
+# more search
+#hidet.torch.dynamo_config.search_space(0)
+#hidet.torch.dynamo_config.dump_graph_ir("./local_graph")
+#  hidet.option.cache_dir("local_cache")
+# automatically transform the model to use float16 data type
+#hidet.torch.dynamo_config.use_fp16(True)
+# use float16 data type as the accumulate data type in operators with reduction
+#hidet.torch.dynamo_config.use_fp16_reduction(True)
+# use tensorcore
+#hidet.torch.dynamo_config.use_tensor_core()
+#hidet.torch.dynamo_config.steal_weights(False)
+# Preload and compile both models
 pipe = load_and_prepare_model()
+MAX_SEED = np.iinfo(np.int64).max
 neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
 def upload_to_ftp(filename):
     try:
         transport = paramiko.Transport((FTP_HOST, 22))
+        destination_path=FTP_DIR+filename
         transport.connect(username = FTP_USER, password = FTP_PASS)
         sftp = paramiko.SFTPClient.from_transport(transport)
         sftp.put(filename, destination_path)
     except Exception as e:
         print(f"FTP upload error: {e}")
 def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
+    filename= f'rv_C_{timestamp}.txt'
     with open(filename, "w") as f:
+        f.write(f"Realvis 5.0 (Tester C) \n")
         f.write(f"Date/time: {timestamp} \n")
         f.write(f"Prompt: {prompt} \n")
         f.write(f"Steps: {num_inference_steps} \n")
         f.write(f"Guidance Scale: {guidance_scale} \n")
         f.write(f"SPACE SETUP: \n")
         f.write(f"Model Scheduler: Euler_a all_custom before cuda \n")
+        f.write(f"Model VAE: sdxl-vae-bf16\n")
         f.write(f"To cuda and bfloat \n")
     upload_to_ftp(filename)
+code = r'''
+import torch
+import paramiko
+import os
+FTP_HOST = os.getenv("FTP_HOST")
+FTP_USER = os.getenv("FTP_USER")
+FTP_PASS = os.getenv("FTP_PASS")
+FTP_DIR = os.getenv("FTP_DIR")
+def scheduler_swap_callback(pipeline, step_index, timestep, callback_kwargs):
+        # adjust the batch_size of prompt_embeds according to guidance_scale
+    if step_index == int(pipeline.num_timesteps * 0.1):
+        print("-- swapping torch modes --")
+      #  pipeline.scheduler = euler_scheduler
+        torch.set_float32_matmul_precision("high")
+       # pipe.vae = vae_b
+        torch.backends.cudnn.allow_tf32 = True
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cuda.preferred_blas_library="cublaslt"
+    #if step_index == int(pipeline.num_timesteps * 0.5):
+       # torch.set_float32_matmul_precision("medium")
+        #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.float64)
+        #pipe.unet.to(torch.float64)
+      #  pipe.guidance_scale=1.0
+       # pipe.scheduler.set_timesteps(num_inference_steps*.70)
+      #  print(f"-- setting step {pipeline.num_timesteps * 0.1} --")
+      #  pipeline.scheduler._step_index = pipeline.num_timesteps * 0.1
+    if step_index == int(pipeline.num_timesteps * 0.9):
+        torch.backends.cuda.preferred_blas_library="cublas"
+        torch.backends.cudnn.allow_tf32 = False
+        torch.backends.cuda.matmul.allow_tf32 = False
+        torch.set_float32_matmul_precision("highest")
+        #callback_kwargs["latents"] = callback_kwargs["latents"].to(torch.bfloat16)
+        #pipe.unet.to(torch.float64)
+        #pipeline.unet.set_default_attn_processor()            ##   custom    ##
+     #   pipe.vae = vae_a
+     #   pipe.unet = unet_a
+        torch.backends.cudnn.deterministic = False
+        print("-- swapping torch modes --")
+      #  pipeline.scheduler = heun_scheduler
+        #pipe.scheduler.set_timesteps(num_inference_steps*.70)
+      #  print(f"-- setting step {pipeline.num_timesteps * 0.9} --")
+      #  pipeline.scheduler._step_index = pipeline.num_timesteps * 0.9
+    return callback_kwargs
+def upload_to_ftp(filename):
+    try:
+        transport = paramiko.Transport((FTP_HOST, 22))
+        destination_path=FTP_DIR+filename
+        transport.connect(username = FTP_USER, password = FTP_PASS)
+        sftp = paramiko.SFTPClient.from_transport(transport)
+        sftp.put(filename, destination_path)
+        sftp.close()
+        transport.close()
+        print(f"Uploaded {filename} to FTP server")
+    except Exception as e:
+        print(f"FTP upload error: {e}")
+def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
+    filename= f'rv_B_{timestamp}.txt'
+    with open(filename, "w") as f:
+        f.write(f"Realvis 5.0 (Tester B) \n")
+        f.write(f"Date/time: {timestamp} \n")
+        f.write(f"Prompt: {prompt} \n")
+        f.write(f"Steps: {num_inference_steps} \n")
+        f.write(f"Guidance Scale: {guidance_scale} \n")
+        f.write(f"SPACE SETUP: \n")
+        f.write(f"Model VAE: sdxl-vae-bf16\n")
+        f.write(f"To cuda and bfloat \n")
+    return filename
+'''
+pyx = cyper.inline(code, fast_indexing=True, directives=dict(boundscheck=False, wraparound=False, language_level=3))
 @spaces.GPU(duration=40)
 def generate_30(
     prompt: str,
     height: int = 768,
     guidance_scale: float = 4,
     num_inference_steps: int = 125,
+    sage: bool = False,
     use_resolution_binning: bool = True,
+    progress=gr.Progress(track_tqdm=True)
 ):
+    if sage==True:
+        F.scaled_dot_product_attention = sageattn
+    if sage==False:
+        F.scaled_dot_product_attention = F.scaled_dot_product_attention
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     options = {
         "prompt": [prompt],
         "negative_prompt": [negative_prompt],
         "num_inference_steps": num_inference_steps,
         "generator": generator,
         "output_type": "pil",
+        "callback_on_step_end": pyx.scheduler_swap_callback,
     }
     if use_resolution_binning:
         options["use_resolution_binning"] = True
     images = []
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = pyx.uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
+    pyx.upload_to_ftp(filename)
     batch_options = options.copy()
     rv_image = pipe(**batch_options).images[0]
+    sd_image_path = f"rv_B_{timestamp}.png"
     rv_image.save(sd_image_path,optimize=False,compress_level=0)
+    pyx.upload_to_ftp(sd_image_path)
+    torch.set_float32_matmul_precision("medium")
+    with torch.no_grad():
+        upscale = upscaler(rv_image, tiling=True, tile_width=256, tile_height=256)
+    downscale1 = upscale.resize((upscale.width // 4, upscale.height // 4), Image.LANCZOS)
+    downscale_path = f"rv50_upscale_{timestamp}.png"
+    downscale1.save(downscale_path,optimize=False,compress_level=0)
+    pyx.upload_to_ftp(downscale_path)
     unique_name = str(uuid.uuid4()) + ".png"
     os.symlink(sd_image_path, unique_name)
     return [unique_name]
     height: int = 768,
     guidance_scale: float = 4,
     num_inference_steps: int = 125,
+    sage: bool = False,
     use_resolution_binning: bool = True,
+    progress=gr.Progress(track_tqdm=True)
 ):
+    if sage==True:
+        F.scaled_dot_product_attention = sageattn
+    if sage==False:
+        F.scaled_dot_product_attention = F.scaled_dot_product_attention
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     options = {
         "prompt": [prompt],
         "negative_prompt": [negative_prompt],
         "num_inference_steps": num_inference_steps,
         "generator": generator,
         "output_type": "pil",
+        "callback_on_step_end": pyx.scheduler_swap_callback,
     }
     if use_resolution_binning:
         options["use_resolution_binning"] = True
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
     batch_options = options.copy()
+    gc.collect()
+    torch.cuda.empty_cache()
+    time.sleep(2)
     rv_image = pipe(**batch_options).images[0]
+    sd_image_path = f"rv_B_{timestamp}.png"
     rv_image.save(sd_image_path,optimize=False,compress_level=0)
     upload_to_ftp(sd_image_path)
     unique_name = str(uuid.uuid4()) + ".png"
     height: int = 768,
     guidance_scale: float = 4,
     num_inference_steps: int = 125,
+    sage: bool = False,
     use_resolution_binning: bool = True,
+    progress=gr.Progress(track_tqdm=True)
 ):
+    if sage==True:
+        F.scaled_dot_product_attention = sageattn
+    if sage==False:
+        F.scaled_dot_product_attention = F.scaled_dot_product_attention
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     options = {
         "prompt": [prompt],
         "negative_prompt": [negative_prompt],
         "num_inference_steps": num_inference_steps,
         "generator": generator,
         "output_type": "pil",
+        "callback_on_step_end": pyx.scheduler_swap_callback,
     }
     if use_resolution_binning:
         options["use_resolution_binning"] = True
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     uploadNote(prompt,num_inference_steps,guidance_scale,timestamp)
     batch_options = options.copy()
+    gc.collect()
+    torch.cuda.empty_cache()
+    time.sleep(2)
     rv_image = pipe(**batch_options).images[0]
+    sd_image_path = f"rv_B_{timestamp}.png"
     rv_image.save(sd_image_path,optimize=False,compress_level=0)
     upload_to_ftp(sd_image_path)
     unique_name = str(uuid.uuid4()) + ".png"
         run_button_30 = gr.Button("Run 30 Seconds", scale=0)
         run_button_60 = gr.Button("Run 60 Seconds", scale=0)
         run_button_90 = gr.Button("Run 90 Seconds", scale=0)
     result = gr.Gallery(label="Result", columns=1, show_label=False)
     with gr.Row():
         with gr.Row():
             width = gr.Slider(
                 label="Width",
+                minimum=448,
                 maximum=MAX_IMAGE_SIZE,
                 step=64,
                 value=768,
             )
             height = gr.Slider(
                 label="Height",
+                minimum=448,
                 maximum=MAX_IMAGE_SIZE,
                 step=64,
                 value=768,
             )
             num_inference_steps = gr.Slider(
                 label="Number of inference steps",
+                minimum=10,
                 maximum=1000,
+                step=10,
                 value=180,
             )
+            options = [True, False]
+            sage = gr.Radio(
+                show_label=True,
+                container=True,
+                interactive=True,
+                choices=options,
+                value=False,
+                label="Use SageAttention: ",
+            )
     gr.Examples(
         examples=examples,
             height,
             guidance_scale,
             num_inference_steps,
+            sage,
         ],
         outputs=[result],
     )
             height,
             guidance_scale,
             num_inference_steps,
+            sage,
         ],
         outputs=[result],
     )
             height,
             guidance_scale,
             num_inference_steps,
+            sage,
         ],
         outputs=[result],
     )
     gr.Markdown("### REALVISXL V5.0")
     predefined_gallery = gr.Gallery(label="REALVISXL V5.0", columns=3, show_label=False, value=load_predefined_images1())
 description = "Text Generator Application by ecarbo"
 if __name__ == "__main__":
+    cuda_directories = find_cuda_directories()
+    if cuda_directories:
+        print("Found CUDA directories:")
+        for directory, version in cuda_directories.items():
+            print(f"- {directory}: Version {version}")
+    else:
+        print("No CUDA directories found in the specified paths.")
+    # Example of how to find the "best" CUDA path (customize logic)
+    if cuda_directories:
+      # Simple example: just pick the first one.  You might have more sophisticated selection criteria
+      best_cuda_path = list(cuda_directories.keys())
+      print(f"Using CUDA path: {best_cuda_path}")
     demo_interface = demo.queue(max_size=50)  # Remove .launch() here
     text_gen_interface = gr.Interface(