StableDiffusion-3.5-Large

Running on Zero

App Files Files Community

ford442 commited on Jan 14

Commit

c0e4160

verified ·

1 Parent(s): 417aebf

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -164

app.py CHANGED Viewed

@@ -25,21 +25,17 @@ FTP_USER = "ford442"
 FTP_PASS = "GoogleBez12!"
 FTP_DIR = "1ink.us/stable_diff/"  # Remote directory on FTP server
-#torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
-#torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
-#torch.backends.cudnn.benchmark = False
 #torch.backends.cuda.preferred_blas_library="cublas"
 #torch.backends.cuda.preferred_linalg_library="cusolver"
 hftoken = os.getenv("HF_AUTH_TOKEN")
-#image_encoder_path = "google/siglip-so400m-patch14-384"
-#ipadapter_path = hf_hub_download(repo_id="InstantX/SD3.5-Large-IP-Adapter", filename="ip-adapter.bin")
-#model_path = 'ford442/stable-diffusion-3.5-medium-bf16'
 def upload_to_ftp(filename):
     try:
         transport = paramiko.Transport((FTP_HOST, 22))
@@ -66,7 +62,7 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
   #  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
     #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
     #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
-    tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
     torch_dtype=torch.bfloat16,
     #use_safetensors=False,
 )
@@ -77,90 +73,77 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
 pipe.to(device)
 #pipe.to(device=device, dtype=torch.bfloat16)
-upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
-def filter_text(text,phraseC):
-  """Filters out the text up to and including 'Rewritten Prompt:'."""
-  phrase = "Rewritten Prompt:"
-  phraseB = "rewritten text:"
-  pattern = f"(.*?){re.escape(phrase)}(.*)"
-  patternB = f"(.*?){re.escape(phraseB)}(.*)"
-  #  matchB = re.search(patternB, text)
-  matchB = re.search(patternB, text, flags=re.DOTALL)
-  if matchB:
-        filtered_text = matchB.group(2)
-        match = re.search(pattern, filtered_text, flags=re.DOTALL)
-        if match:
-          filtered_text = match.group(2)
-          filtered_text = re.sub(phraseC, "", filtered_text, flags=re.DOTALL)  # Replaces the matched pattern with an empty string
-          return filtered_text
-        else:
-          return filtered_text
-  else:
-        # Handle the case where no match is found
-        return text
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 4096
-@spaces.GPU(duration=90)
-def infer(
     prompt,
     negative_prompt_1,
     negative_prompt_2,
     negative_prompt_3,
-    seed,
-    randomize_seed,
     width,
     height,
     guidance_scale,
     num_inference_steps,
-    expanded,
-    latent_file,  # Add latents file input
     progress=gr.Progress(track_tqdm=True),
 ):
-    upscaler_2.to(torch.device('cpu'))
     torch.set_float32_matmul_precision("highest")
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
-    enhanced_prompt = prompt
-    enhanced_prompt_2 = prompt
-    if latent_file:  # Check if a latent file is provided
-      #  initial_latents = pipe.prepare_latents(
-      #      batch_size=1,
-      #      num_channels_latents=pipe.transformer.in_channels,
-      #      height=pipe.transformer.config.sample_size[0],
-       #     width=pipe.transformer.config.sample_size[1],
-      #      dtype=pipe.transformer.dtype,
-      #      device=pipe.device,
-      #      generator=generator,
-      #  )
-        sd_image_a = Image.open(latent_file.name)
-        print("-- using image file --")
-        print('-- generating image --')
-        #with torch.no_grad():
-        sd_image = pipe(
-            prompt=enhanced_prompt,  # This conversion is fine
             negative_prompt=negative_prompt_1,
             guidance_scale=guidance_scale,
             num_inference_steps=num_inference_steps,
             width=width,
             height=height,
-            latents=sd_image_a,
-            generator=generator
-        ).images[0]
-        rv_path = f"sd35_{seed}.png"
-        sd_image[0].save(rv_path,optimize=False,compress_level=0)
-        upload_to_ftp(rv_path)
-    else:
-        print('-- generating image --')
-        #with torch.no_grad():
-        sd_image = pipe(
-            prompt=prompt,  # This conversion is fine
-            prompt_2=enhanced_prompt_2,
-            prompt_3=enhanced_prompt,
             negative_prompt=negative_prompt_1,
             negative_prompt_2=negative_prompt_2,
             negative_prompt_3=negative_prompt_3,
@@ -168,93 +151,123 @@ def infer(
             num_inference_steps=num_inference_steps,
             width=width,
             height=height,
-         #   latents=None,
-          #  output_type='latent',
             generator=generator,
             max_sequence_length=512
-        ).images[0]
-        print('-- got image --')
-        #sd35_image = pipe.vae.decode(sd_image / 0.18215).sample
-       # sd35_image = sd35_image.cpu().permute(0, 2, 3, 1).float().detach().numpy()
-       # sd35_image = (sd35_image * 255).round().astype("uint8")
-       # image_pil = Image.fromarray(sd35_image[0])
-      #  sd35_path = f"sd35_{seed}.png"
-       # image_pil.save(sd35_path,optimize=False,compress_level=0)
-       # upload_to_ftp(sd35_path)
-        sd35_path = f"sd35l_{seed}.png"
-        sd_image.save(sd35_path,optimize=False,compress_level=0)
-        upload_to_ftp(sd35_path)
-        # Convert the generated image to a tensor
-    #generated_image_tensor = torch.tensor([np.array(sd_image).transpose(2, 0, 1)]).to('cuda') / 255.0
-    # Encode the generated image into latents
-    #with torch.no_grad():
-    #    generated_latents = pipe.vae.encode(generated_image_tensor.to(torch.bfloat16)).latent_dist.sample().mul_(0.18215)
-    #latent_path = f"sd35m_{seed}.pt"
-    # Save the latents to a .pt file
-    #torch.save(generated_latents, latent_path)
-    #upload_to_ftp(latent_path)
     #  pipe.unet.to('cpu')
     upscaler_2.to(torch.device('cuda'))
     with torch.no_grad():
         upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
     print('-- got upscaled image --')
-    #upscaler_2.to(torch.device('cpu'))
     downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
     upscale_path = f"sd35l_upscale_{seed}.png"
     downscale2.save(upscale_path,optimize=False,compress_level=0)
     upload_to_ftp(upscale_path)
     return sd_image, seed, enhanced_prompt
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-body{
-  background-color: blue;
-}
-"""
-def repeat_infer(
     prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
     width,
     height,
     guidance_scale,
     num_inference_steps,
-    num_iterations,  # New input for number of iterations
 ):
-    i = 0
-    while i < num_iterations:
-        time.sleep(700)  # Wait for 10 minutes (600 seconds)
-        result, seed, image_path, enhanced_prompt = infer(
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        )
-        # Optionally, you can add logic here to process the results of each iteration
-        # For example, you could display the image, save it with a different name, etc.
-        i += 1
-    return result, seed, image_path, enhanced_prompt
 with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Large")
-        expanded_prompt_output = gr.Textbox(label="Expanded Prompt", lines=5)  # Add this line
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
@@ -263,19 +276,12 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
                 placeholder="Enter your prompt",
                 container=False,
             )
-            options = [True, False]
-            expanded = gr.Radio(
-                show_label=True,
-                container=True,
-                interactive=True,
-                choices=options,
-                value=True,
-                label="Use expanded prompt: ",
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=True):
-            latent_file = gr.File(label="Image File (optional)")  # Add latents file input
             negative_prompt_1 = gr.Text(
                 label="Negative prompt 1",
                 max_lines=1,
@@ -300,60 +306,92 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
             num_iterations = gr.Number(
                 value=1000,
                 label="Number of Iterations")
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=768,  # Replace with defaults that work for your model
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=768,  # Replace with defaults that work for your model
                 )
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
                     maximum=30.0,
                     step=0.1,
-                    value=4.2,  # Replace with defaults that work for your model
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
                     maximum=500,
                     step=1,
-                    value=220,  # Replace with defaults that work for your model
                 )
-            gr.Examples(examples=examples, inputs=[prompt])
         gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
         inputs=[
             prompt,
             negative_prompt_1,
             negative_prompt_2,
             negative_prompt_3,
-            seed,
-            randomize_seed,
             width,
             height,
             guidance_scale,
             num_inference_steps,
-            expanded,
-            latent_file,  # Add latent_file to the inputs
         ],
         outputs=[result, seed, expanded_prompt_output],
         )

 FTP_PASS = "GoogleBez12!"
 FTP_DIR = "1ink.us/stable_diff/"  # Remote directory on FTP server
+torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
+torch.backends.cudnn.allow_tf32 = False
 torch.backends.cudnn.deterministic = False
+torch.backends.cudnn.benchmark = False
 #torch.backends.cuda.preferred_blas_library="cublas"
 #torch.backends.cuda.preferred_linalg_library="cusolver"
 hftoken = os.getenv("HF_AUTH_TOKEN")
 def upload_to_ftp(filename):
     try:
         transport = paramiko.Transport((FTP_HOST, 22))
   #  text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True),
     #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
     #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
+    #tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
     torch_dtype=torch.bfloat16,
     #use_safetensors=False,
 )
 pipe.to(device)
 #pipe.to(device=device, dtype=torch.bfloat16)
+upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device('cpu'))
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 4096
+@spaces.GPU(duration=30)
+def infer_30(
     prompt,
     negative_prompt_1,
     negative_prompt_2,
     negative_prompt_3,
     width,
     height,
     guidance_scale,
     num_inference_steps,
     progress=gr.Progress(track_tqdm=True),
 ):
     torch.set_float32_matmul_precision("highest")
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
+    print('-- generating image --')
+    sd_image = pipe(
+            prompt=prompt,
+            prompt_2=prompt,
+            prompt_3=prompt,
             negative_prompt=negative_prompt_1,
+            negative_prompt_2=negative_prompt_2,
+            negative_prompt_3=negative_prompt_3,
             guidance_scale=guidance_scale,
             num_inference_steps=num_inference_steps,
             width=width,
             height=height,
+            generator=generator,
+            max_sequence_length=512
+    ).images[0]
+    print('-- got image --')
+    sd35_path = f"sd35l_{seed}.png"
+    sd_image.save(sd35_path,optimize=False,compress_level=0)
+    upload_to_ftp(sd35_path)
+    #  pipe.unet.to('cpu')
+    upscaler_2.to(torch.device('cuda'))
+    with torch.no_grad():
+        upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
+    print('-- got upscaled image --')
+    downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
+    upscale_path = f"sd35l_upscale_{seed}.png"
+    downscale2.save(upscale_path,optimize=False,compress_level=0)
+    upload_to_ftp(upscale_path)
+    return sd_image, seed, enhanced_prompt
+@spaces.GPU(duration=60)
+def infer_60(
+    prompt,
+    negative_prompt_1,
+    negative_prompt_2,
+    negative_prompt_3,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+    progress=gr.Progress(track_tqdm=True),
+):
+    torch.set_float32_matmul_precision("highest")
+    seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device='cuda').manual_seed(seed)
+    print('-- generating image --')
+    sd_image = pipe(
+            prompt=prompt,
+            prompt_2=prompt,
+            prompt_3=prompt,
             negative_prompt=negative_prompt_1,
             negative_prompt_2=negative_prompt_2,
             negative_prompt_3=negative_prompt_3,
             num_inference_steps=num_inference_steps,
             width=width,
             height=height,
             generator=generator,
             max_sequence_length=512
+    ).images[0]
+    print('-- got image --')
+    sd35_path = f"sd35l_{seed}.png"
+    sd_image.save(sd35_path,optimize=False,compress_level=0)
+    upload_to_ftp(sd35_path)
     #  pipe.unet.to('cpu')
     upscaler_2.to(torch.device('cuda'))
     with torch.no_grad():
         upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
     print('-- got upscaled image --')
     downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
     upscale_path = f"sd35l_upscale_{seed}.png"
     downscale2.save(upscale_path,optimize=False,compress_level=0)
     upload_to_ftp(upscale_path)
     return sd_image, seed, enhanced_prompt
+@spaces.GPU(duration=90)
+def infer_90(
+    prompt,
+    negative_prompt_1,
+    negative_prompt_2,
+    negative_prompt_3,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+    progress=gr.Progress(track_tqdm=True),
+):
+    torch.set_float32_matmul_precision("highest")
+    seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device='cuda').manual_seed(seed)
+    print('-- generating image --')
+    sd_image = pipe(
+            prompt=prompt,
+            prompt_2=prompt,
+            prompt_3=prompt,
+            negative_prompt=negative_prompt_1,
+            negative_prompt_2=negative_prompt_2,
+            negative_prompt_3=negative_prompt_3,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            width=width,
+            height=height,
+            generator=generator,
+            max_sequence_length=512
+    ).images[0]
+    print('-- got image --')
+    sd35_path = f"sd35l_{seed}.png"
+    sd_image.save(sd35_path,optimize=False,compress_level=0)
+    upload_to_ftp(sd35_path)
+    #  pipe.unet.to('cpu')
+    upscaler_2.to(torch.device('cuda'))
+    with torch.no_grad():
+        upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
+    print('-- got upscaled image --')
+    downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
+    upscale_path = f"sd35l_upscale_{seed}.png"
+    downscale2.save(upscale_path,optimize=False,compress_level=0)
+    upload_to_ftp(upscale_path)
+    return sd_image, seed, enhanced_prompt
+@spaces.GPU(duration=100)
+def infer_100(
     prompt,
+    negative_prompt_1,
+    negative_prompt_2,
+    negative_prompt_3,
     width,
     height,
     guidance_scale,
     num_inference_steps,
+    progress=gr.Progress(track_tqdm=True),
 ):
+    torch.set_float32_matmul_precision("highest")
+    seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device='cuda').manual_seed(seed)
+    print('-- generating image --')
+    sd_image = pipe(
+            prompt=prompt,
+            prompt_2=prompt,
+            prompt_3=prompt,
+            negative_prompt=negative_prompt_1,
+            negative_prompt_2=negative_prompt_2,
+            negative_prompt_3=negative_prompt_3,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            width=width,
+            height=height,
+            generator=generator,
+            max_sequence_length=512
+    ).images[0]
+    print('-- got image --')
+    sd35_path = f"sd35l_{seed}.png"
+    sd_image.save(sd35_path,optimize=False,compress_level=0)
+    upload_to_ftp(sd35_path)
+    #  pipe.unet.to('cpu')
+    upscaler_2.to(torch.device('cuda'))
+    with torch.no_grad():
+        upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
+    print('-- got upscaled image --')
+    downscale2 = upscale2.resize((upscale2.width // 4, upscale2.height // 4),Image.LANCZOS)
+    upscale_path = f"sd35l_upscale_{seed}.png"
+    downscale2.save(upscale_path,optimize=False,compress_level=0)
+    upload_to_ftp(upscale_path)
+    return sd_image, seed, enhanced_prompt
+css = """
+#col-container {margin: 0 auto;max-width: 640px;}
+body{background-color: blue;}
+"""
 with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # Text-to-Image StableDiffusion 3.5 Large")
+        expanded_prompt_output = gr.Textbox(label="Prompt", lines=1)  # Add this line
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 placeholder="Enter your prompt",
                 container=False,
             )
+            run_button_30 = gr.Button("Run 30", scale=0, variant="primary")
+            run_button_60 = gr.Button("Run 60", scale=0, variant="primary")
+            run_button_90 = gr.Button("Run 90", scale=0, variant="primary")
+            run_button_100 = gr.Button("Run 100", scale=0, variant="primary")
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=True):
             negative_prompt_1 = gr.Text(
                 label="Negative prompt 1",
                 max_lines=1,
             num_iterations = gr.Number(
                 value=1000,
                 label="Number of Iterations")
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=768,
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=768,
                 )
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
                     maximum=30.0,
                     step=0.1,
+                    value=4.2,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
                     maximum=500,
                     step=1,
+                    value=50,
                 )
         gr.on(
+        triggers=[run_button_30.click, prompt.submit],
+        fn=infer_30,
+        inputs=[
+            prompt,
+            negative_prompt_1,
+            negative_prompt_2,
+            negative_prompt_3,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result, seed, expanded_prompt_output],
+        )
+        gr.on(
+        triggers=[run_button_60.click, prompt.submit],
+        fn=infer_60,
+        inputs=[
+            prompt,
+            negative_prompt_1,
+            negative_prompt_2,
+            negative_prompt_3,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result, seed, expanded_prompt_output],
+        )
+        gr.on(
+        triggers=[run_button_90.click, prompt.submit],
+        fn=infer_90,
+        inputs=[
+            prompt,
+            negative_prompt_1,
+            negative_prompt_2,
+            negative_prompt_3,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result, seed, expanded_prompt_output],
+        )
+        gr.on(
+        triggers=[run_button_100.click, prompt.submit],
+        fn=infer_100,
         inputs=[
             prompt,
             negative_prompt_1,
             negative_prompt_2,
             negative_prompt_3,
             width,
             height,
             guidance_scale,
             num_inference_steps,
         ],
         outputs=[result, seed, expanded_prompt_output],
         )