flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 8, 2024

Commit

044186b

1 Parent(s): b11c213

fix: using the VAE directly

Browse files

Files changed (1) hide show

app.py +25 -5

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import numpy as np
 import random
 import spaces
 import torch
 from diffusers import DiffusionPipeline
 # Define constants
@@ -14,6 +16,22 @@ MAX_IMAGE_SIZE = 2048
 # Load the diffusion pipeline
 pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
     if randomize_seed:
@@ -23,22 +41,23 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
     if init_image is not None:
         # Process img2img
         init_image = init_image.convert("RGB")
-        init_image = pipe.preprocess(init_image).unsqueeze(0).to(device, dtype)
         image = pipe(
             prompt=prompt,
-            init_image=init_image,
-            width=width,
             height=height,
             num_inference_steps=num_inference_steps,
             generator=generator,
-            guidance_scale=0.0
         ).images[0]
     else:
         # Process text2img
         image = pipe(
             prompt=prompt,
-            width=width,
             height=height,
             num_inference_steps=num_inference_steps,
             generator=generator,
             guidance_scale=0.0
@@ -164,3 +183,4 @@ with gr.Blocks(css=css) as demo:
 demo.launch()

 import random
 import spaces
 import torch
+from PIL import Image
+from torchvision import transforms
 from diffusers import DiffusionPipeline
 # Define constants
 # Load the diffusion pipeline
 pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
+def preprocess_image(image):
+    # Preprocess the image for the VAE
+    preprocess = transforms.Compose([
+        transforms.Resize((512, 512)),  # Adjust the size as needed
+        transforms.ToTensor(),
+        transforms.Normalize([0.5], [0.5])
+    ])
+    image = preprocess(image).unsqueeze(0).to(device)
+    return image
+def encode_image(image, vae):
+    # Encode the image using the VAE
+    with torch.no_grad():
+        latents = vae.encode(image).latent_dist.sample() * 0.18215
+    return latents
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
     if randomize_seed:
     if init_image is not None:
         # Process img2img
         init_image = init_image.convert("RGB")
+        init_image = preprocess_image(init_image)
+        latents = encode_image(init_image, pipe.vae)
         image = pipe(
             prompt=prompt,
             height=height,
+            width=width,
             num_inference_steps=num_inference_steps,
             generator=generator,
+            guidance_scale=0.0,
+            latents=latents
         ).images[0]
     else:
         # Process text2img
         image = pipe(
             prompt=prompt,
             height=height,
+            width=width,
             num_inference_steps=num_inference_steps,
             generator=generator,
             guidance_scale=0.0
 demo.launch()