StableDiffusion-3.5-Large

Running on Zero

App Files Files Community

ford442 commited on Jan 14

Commit

98b3127

verified ·

1 Parent(s): 645ce84

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -123

app.py CHANGED Viewed

@@ -1,14 +1,10 @@
 import spaces
 import gradio as gr
 import numpy as np
-#import tensorrt as trt
 import random
 import torch
-from diffusers import StableDiffusion3Pipeline, AutoencoderKL, SD3Transformer2DModel
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, CLIPTextModelWithProjection, T5EncoderModel
-#from threading import Thread
-#from transformers import pipeline
-from transformers import T5Tokenizer, T5ForConditionalGeneration
 import re
 import paramiko
 import urllib
@@ -16,6 +12,8 @@ import time
 import os
 from image_gen_aux import UpscaleWithModel
 from huggingface_hub import hf_hub_download
 #from models.transformer_sd3 import SD3Transformer2DModel
 #from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
 from PIL import Image
@@ -25,13 +23,13 @@ FTP_USER = "ford442"
 FTP_PASS = "GoogleBez12!"
 FTP_DIR = "1ink.us/stable_diff/"  # Remote directory on FTP server
-torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
-torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
 #torch.backends.cudnn.benchmark = False
-torch.backends.cuda.preferred_blas_library="cublas"
 #torch.backends.cuda.preferred_linalg_library="cusolver"
 hftoken = os.getenv("HF_AUTH_TOKEN")
@@ -56,57 +54,26 @@ def upload_to_ftp(filename):
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 torch_dtype = torch.bfloat16
-#checkpoint = "microsoft/Phi-3.5-mini-instruct"
-#vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
-#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
-#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16")
-#vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False) #, device_map='cpu') #.to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
 pipe = StableDiffusion3Pipeline.from_pretrained(
-    #"stabilityai/stable-diffusion-3.5-large",
     "ford442/stable-diffusion-3.5-large-bf16",
-#    vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_safetensors=True, subfolder='vae',token=True),
-  #  transformer=SD3Transformer2DModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='transformer',token=True),
-   # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder',token=True),
   #  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
   #  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
     token=True,
     #use_safetensors=False,
 )
-pipe.to(device=device, dtype=torch.bfloat16)
-#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/stable-diffusion-3.5-medium-bf16").to(torch.device("cuda:0"))
-#pipe = StableDiffusion3Pipeline.from_pretrained("ford442/RealVis_Medium_1.0b_bf16", torch_dtype=torch.bfloat16)
-#pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium", token=hftoken, torch_dtype=torch.float32, device_map='balanced')
-# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
-#pipe.scheduler.config.requires_aesthetics_score = False
-#pipe.enable_model_cpu_offload()
-#pipe.to(device)
 #pipe.to(device=device, dtype=torch.bfloat16)
-#pipe = torch.compile(pipe)
-# pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, beta_schedule="scaled_linear")
-#refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("ford442/stable-diffusion-xl-refiner-1.0-bf16",vae = vaeXL, requires_aesthetics_score=True) #.to(torch.bfloat16)
-#refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=vae, torch_dtype=torch.float32, requires_aesthetics_score=True, device_map='balanced')
-#refiner.scheduler=EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config)
-#refiner.enable_model_cpu_offload()
 #pipe.to(device=device, dtype=torch.bfloat16)
-#refiner.scheduler.config.requires_aesthetics_score=False
-#refiner.to(device)
-#refiner = torch.compile(refiner)
-#refiner.scheduler = EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config, beta_schedule="scaled_linear")
-#refiner.scheduler = EulerAncestralDiscreteScheduler.from_config(refiner.scheduler.config)
-#tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=True)
-#tokenizer.tokenizer_legacy=False
-#model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
-#model = torch.compile(model)
 upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
 def filter_text(text,phraseC):
@@ -153,62 +120,10 @@ def infer(
     torch.set_float32_matmul_precision("highest")
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
-    '''
-    if expanded:
-        system_prompt_rewrite = (
-        "You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
-        )
-        user_prompt_rewrite = (
-        "Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
-        )
-        user_prompt_rewrite_2 = (
-        "Rephrase this scene to have more elaborate details: "
-        )
-        input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
-        input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
-        print("-- got prompt --")
-        # Encode the input text and include the attention mask
-        encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
-        encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
-        # Ensure all values are on the correct device
-        input_ids = encoded_inputs["input_ids"].to(device)
-        input_ids_2 = encoded_inputs_2["input_ids"].to(device)
-        attention_mask = encoded_inputs["attention_mask"].to(device)
-        attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
-        print("-- tokenize prompt --")
-          # Google T5
-        #input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
-        outputs = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            max_new_tokens=512,
-            temperature=0.2,
-            top_p=0.9,
-            do_sample=True,
-        )
-        outputs_2 = model.generate(
-            input_ids=input_ids_2,
-            attention_mask=attention_mask_2,
-            max_new_tokens=65,
-            temperature=0.2,
-            top_p=0.9,
-            do_sample=True,
-        )
-        # Use the encoded tensor 'text_inputs' here
-        enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
-        print('-- generated prompt --')
-        enhanced_prompt = filter_text(enhanced_prompt,prompt)
-        enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
-        print('-- filtered prompt --')
-        print(enhanced_prompt)
-        print('-- filtered prompt 2 --')
-        print(enhanced_prompt_2)
-    else:
-    '''
     enhanced_prompt = prompt
     enhanced_prompt_2 = prompt
-    #model.to('cpu')
     if latent_file:  # Check if a latent file is provided
       #  initial_latents = pipe.prepare_latents(
       #      batch_size=1,
@@ -263,7 +178,7 @@ def infer(
       #  sd35_path = f"sd35_{seed}.png"
        # image_pil.save(sd35_path,optimize=False,compress_level=0)
        # upload_to_ftp(sd35_path)
-        sd35_path = f"sd35_{seed}.png"
         sd_image.save(sd35_path,optimize=False,compress_level=0)
         upload_to_ftp(sd35_path)
         # Convert the generated image to a tensor
@@ -275,31 +190,14 @@ def infer(
     # Save the latents to a .pt file
     #torch.save(generated_latents, latent_path)
     #upload_to_ftp(latent_path)
-    #refiner.scheduler.set_timesteps(num_inference_steps,device)
-    '''
-    pipe.to(torch.device('cpu'))
-    refiner.to(device=device, dtype=torch.bfloat16)
-    refine = refiner(
-            prompt=f"{enhanced_prompt_2}, high quality masterpiece, complex details",
-            negative_prompt = negative_prompt_1,
-            negative_prompt_2 = negative_prompt_2,
-            guidance_scale=7.5,
-            num_inference_steps=num_inference_steps,
-            image=sd_image,
-            generator=generator,
-    ).images[0]
-    refine_path = f"sd35_refine_{seed}.png"
-    refine.save(refine_path,optimize=False,compress_level=0)
-    upload_to_ftp(refine_path)
-    refiner.to(torch.device('cpu'))
-    '''
     upscaler_2.to(torch.device('cuda'))
     with torch.no_grad():
         upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
     print('-- got upscaled image --')
-    upscaler_2.to(torch.device('cpu'))
     downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
-    upscale_path = f"sd35_upscale_{seed}.png"
     downscale2.save(upscale_path,optimize=False,compress_level=0)
     upload_to_ftp(upscale_path)
     return sd_image, seed, enhanced_prompt

 import spaces
 import gradio as gr
 import numpy as np
 import random
 import torch
+from diffusers import StableDiffusion3Pipeline
+#from transformers import CLIPTextModelWithProjection, T5EncoderModel
 import re
 import paramiko
 import urllib
 import os
 from image_gen_aux import UpscaleWithModel
 from huggingface_hub import hf_hub_download
+#from diffusers import SD3Transformer2DModel, AutoencoderKL
 #from models.transformer_sd3 import SD3Transformer2DModel
 #from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
 from PIL import Image
 FTP_PASS = "GoogleBez12!"
 FTP_DIR = "1ink.us/stable_diff/"  # Remote directory on FTP server
+#torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
+#torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
 #torch.backends.cudnn.benchmark = False
+#torch.backends.cuda.preferred_blas_library="cublas"
 #torch.backends.cuda.preferred_linalg_library="cusolver"
 hftoken = os.getenv("HF_AUTH_TOKEN")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 torch_dtype = torch.bfloat16
 pipe = StableDiffusion3Pipeline.from_pretrained(
+    #"stabilityai  #  stable-diffusion-3.5-large",
     "ford442/stable-diffusion-3.5-large-bf16",
+#    vae=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", use_safetensors=True, subfolder='vae',token=True),
+     #scheduler = FlowMatchHeunDiscreteScheduler.from_pretrained('ford442/stable-diffusion-3.5-large-bf16', subfolder='scheduler',token=True),
+    # text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True),
   #  text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True),
   #  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
+    tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True)
     token=True,
+    torch_dtype=torch.bfloat16,
     #use_safetensors=False,
 )
 #pipe.to(device=device, dtype=torch.bfloat16)
+#pipe.enable_model_cpu_offload()
+pipe.to(device)
 #pipe.to(device=device, dtype=torch.bfloat16)
 upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
 def filter_text(text,phraseC):
     torch.set_float32_matmul_precision("highest")
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     enhanced_prompt = prompt
     enhanced_prompt_2 = prompt
     if latent_file:  # Check if a latent file is provided
       #  initial_latents = pipe.prepare_latents(
       #      batch_size=1,
       #  sd35_path = f"sd35_{seed}.png"
        # image_pil.save(sd35_path,optimize=False,compress_level=0)
        # upload_to_ftp(sd35_path)
+        sd35_path = f"sd35l_{seed}.png"
         sd_image.save(sd35_path,optimize=False,compress_level=0)
         upload_to_ftp(sd35_path)
         # Convert the generated image to a tensor
     # Save the latents to a .pt file
     #torch.save(generated_latents, latent_path)
     #upload_to_ftp(latent_path)
+    #  pipe.unet.to('cpu')
     upscaler_2.to(torch.device('cuda'))
     with torch.no_grad():
         upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
     print('-- got upscaled image --')
+    #upscaler_2.to(torch.device('cpu'))
     downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
+    upscale_path = f"sd35l_upscale_{seed}.png"
     downscale2.save(upscale_path,optimize=False,compress_level=0)
     upload_to_ftp(upscale_path)
     return sd_image, seed, enhanced_prompt