Spaces:

Ryukijano
/

Fastest-image-generation

Runtime error

App Files Files Community

Ryukijano commited on Dec 9, 2024

Commit

16c45c8

verified ·

1 Parent(s): b2b60ba

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -68

app.py CHANGED Viewed

@@ -67,74 +67,17 @@ def generate_image(
     start_time = time.time()
-    # Initialize static inputs for CUDA graph
-    static_latents = torch.randn(
-        (1, 4, height // 8, width // 8), dtype=dtype, device="cuda"
     )
-    static_prompt_embeds = torch.randn(
-        (2, 77, 768), dtype=dtype, device="cuda"
-    )  # Adjust dimensions as needed
-    static_pooled_prompt_embeds = torch.randn(
-        (2, 768), dtype=dtype, device="cuda"
-    )  # Adjust dimensions as needed
-    static_text_ids = torch.tensor([[[1, 2, 3]]], dtype=torch.int32, device="cuda")
-    static_latent_image_ids = torch.tensor([1], dtype=torch.int64, device="cuda")
-    static_timestep = torch.tensor([999], dtype=dtype, device="cuda")
-    # Warmup
-    s = torch.cuda.Stream()
-    s.wait_stream(torch.cuda.current_stream())
-    with torch.cuda.stream(s):
-        for _ in range(3):
-            _ = pipe.transformer(
-                hidden_states=static_latents,
-                timestep=static_timestep / 1000,
-                guidance=None,
-                pooled_projections=static_pooled_prompt_embeds,
-                encoder_hidden_states=static_prompt_embeds,
-                txt_ids=static_text_ids,
-                img_ids=static_latent_image_ids,
-                return_dict=False,
-            )
-    torch.cuda.current_stream().wait_stream(s)
-    # Capture CUDA Graph
-    g = torch.cuda.CUDAGraph()
-    with torch.cuda.graph(g):
-        static_noise_pred = pipe.transformer(
-            hidden_states=static_latents,
-            timestep=static_timestep / 1000,
-            guidance=None,
-            pooled_projections=static_pooled_prompt_embeds,
-            encoder_hidden_states=static_prompt_embeds,
-            txt_ids=static_text_ids,
-            img_ids=static_latent_image_ids,
-            return_dict=False,
-        )[0]
-        static_latents_out = pipe.scheduler.step(
-            static_noise_pred, static_timestep, static_latents, return_dict=False
-        )[0]
-        static_output = pipe._decode_latents_to_image(
-            static_latents_out, height, width, "pil"
-        )
-    # Graph-based generation function
-    def generate_with_graph(
-        latents,
-        prompt_embeds,
-        pooled_prompt_embeds,
-        text_ids,
-        latent_image_ids,
-        timestep,
-    ):
-        static_latents.copy_(latents)
-        static_prompt_embeds.copy_(prompt_embeds)
-        static_pooled_prompt_embeds.copy_(pooled_prompt_embeds)
-        static_text_ids.copy_(text_ids)
-        static_latent_image_ids.copy_(latent_image_ids)
-        static_timestep.copy_(timestep)
-        g.replay()
-        return static_output
     # Only generate the last image in the sequence
     img = pipe.generate_images(
@@ -143,7 +86,9 @@ def generate_image(
         height=height,
         num_inference_steps=num_inference_steps,
         generator=generator,
-        generate_with_graph=generate_with_graph,
     )
     latency = f"Latency: {(time.time()-start_time):.2f} seconds"
     return img, seed, latency

     start_time = time.time()
+    # Dynamically determine shapes based on input width/height
+    latents_shape = (1, 4, height // 8, width // 8)
+    prompt_embeds_shape = (
+        1,
+        pipe.transformer.text_encoder.config.max_position_embeddings,
+        pipe.transformer.text_encoder.config.hidden_size,
+    )
+    pooled_prompt_embeds_shape = (
+        1,
+        pipe.transformer.text_encoder.config.hidden_size,
     )
     # Only generate the last image in the sequence
     img = pipe.generate_images(
         height=height,
         num_inference_steps=num_inference_steps,
         generator=generator,
+        latents_shape=latents_shape,
+        prompt_embeds_shape=prompt_embeds_shape,
+        pooled_prompt_embeds_shape=pooled_prompt_embeds_shape
     )
     latency = f"Latency: {(time.time()-start_time):.2f} seconds"
     return img, seed, latency