Spaces:

LiruiZhao
/

Diffree

Build error

App Files Files Community

LiruiZhao commited on Aug 5, 2024

Commit

46506d2

1 Parent(s): 621d96f

[Minor] Use The generator function to generate a list

Browse files

Files changed (1) hide show

app.py +60 -16

app.py CHANGED Viewed

@@ -273,7 +273,6 @@ def generate(
             m_img.astype('float') / 2.0 * red).astype('uint8'))
         mask_video_path = "mask.mp4"
         fps = 30
         with imageio.get_writer(mask_video_path, fps=fps) as video:
@@ -282,7 +281,45 @@ def generate(
         return [int(seed), text_cfg_scale, image_cfg_scale, edited_image, mix_image, edited_mask_copy, mask_video_path, image_video_path, input_image_copy, mix_result_with_red_mask]
-@spaces.GPU(duration=120)
 def generate_list(
     input_image: Image.Image,
     generate_list: str,
@@ -322,9 +359,11 @@ def generate_list(
     while generate_index < len(generate_list):
         print(f'generate_index: {str(generate_index)}')
         instruction = generate_list[generate_index]
         with torch.no_grad(), autocast("cuda"), model.ema_scope():
             cond = {}
-            input_image_torch = 2 * torch.tensor(np.array(input_image_copy.copy())).float() / 255 - 1
             input_image_torch = rearrange(input_image_torch, "h w c -> 1 c h w").to(model.device)
             cond["c_crossattn"] = [model.get_learned_conditioning([instruction]).to(model.device)]
             cond["c_concat"] = [model.encode_first_stage(input_image_torch).mode().to(model.device)]
@@ -351,8 +390,10 @@ def generate_list(
             x_1 = nn.functional.interpolate(z_1, size=(height, width), mode="bilinear", align_corners=False)
             x_1 = torch.where(x_1 > 0, 1, -1)  # Thresholding step
-            if torch.sum(x_1).item()/x_1.numel() < -0.99:
                 seed += 1
                 retry_number +=1
                 if retry_number > max_retry:
@@ -384,20 +425,22 @@ def generate_list(
             image_video.append((mix_image_np * 255).astype(np.uint8))
             mix_image = Image.fromarray((mix_image_np * 255).astype(np.uint8)).convert('RGB')
-            input_image_copy = mix_image
-    mix_result_with_red_mask = None
-    mask_video_path = None
-    edited_mask_copy = None
-    image_video_path = "image.mp4"
-    fps = 2
-    with imageio.get_writer(image_video_path, fps=fps) as video:
-        for image in image_video:
-            video.append_data(image)
-    return [int(seed), text_cfg_scale, image_cfg_scale, edited_image, mix_image, edited_mask_copy, mask_video_path, image_video_path, input_image, mix_result_with_red_mask]
 def reset():
@@ -553,4 +596,5 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
 # demo.launch(share=True)
 demo.queue().launch()

             m_img.astype('float') / 2.0 * red).astype('uint8'))
         mask_video_path = "mask.mp4"
         fps = 30
         with imageio.get_writer(mask_video_path, fps=fps) as video:
         return [int(seed), text_cfg_scale, image_cfg_scale, edited_image, mix_image, edited_mask_copy, mask_video_path, image_video_path, input_image_copy, mix_result_with_red_mask]
+def single_generation(model_wrap_cfg, input_image_copy, instruction, steps, seed, text_cfg_scale, image_cfg_scale, height, width):
+    model.cuda()
+    with torch.no_grad(), autocast("cuda"), model.ema_scope():
+            cond = {}
+            input_image_torch = 2 * torch.tensor(np.array(input_image_copy.to(model.device))).float() / 255 - 1
+            input_image_torch = rearrange(input_image_torch, "h w c -> 1 c h w").to(model.device)
+            cond["c_crossattn"] = [model.get_learned_conditioning([instruction]).to(model.device)]
+            cond["c_concat"] = [model.encode_first_stage(input_image_torch).mode().to(model.device)]
+            uncond = {}
+            uncond["c_crossattn"] = [null_token.to(model.device)]
+            uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])]
+            sigmas = model_wrap.get_sigmas(steps).to(model.device)
+            extra_args = {
+                "cond": cond,
+                "uncond": uncond,
+                "text_cfg_scale": text_cfg_scale,
+                "image_cfg_scale": image_cfg_scale,
+            }
+            torch.manual_seed(seed)
+            z_0 = torch.randn_like(cond["c_concat"][0]).to(model.device) * sigmas[0]
+            z_1 = torch.randn_like(cond["c_concat"][0]).to(model.device) * sigmas[0]
+            z_0, z_1, _, _ = sample_euler_ancestral(model_wrap_cfg, z_0, z_1, sigmas, height, width, extra_args=extra_args)
+            x_0 = model.decode_first_stage(z_0)
+            x_1 = nn.functional.interpolate(z_1, size=(height, width), mode="bilinear", align_corners=False)
+            x_1 = torch.where(x_1 > 0, 1, -1)  # Thresholding step
+            x_1_mean = torch.sum(x_1).item()/x_1.numel()
+            return x_0, x_1, x_1_mean
+@spaces.GPU(duration=150)
 def generate_list(
     input_image: Image.Image,
     generate_list: str,
     while generate_index < len(generate_list):
         print(f'generate_index: {str(generate_index)}')
         instruction = generate_list[generate_index]
+        # x_0, x_1, x_1_mean = single_generation(model_wrap_cfg, input_image_copy, instruction, steps, seed, text_cfg_scale, image_cfg_scale, height, width)
         with torch.no_grad(), autocast("cuda"), model.ema_scope():
             cond = {}
+            input_image_torch = 2 * torch.tensor(np.array(input_image_copy)).float() / 255 - 1
             input_image_torch = rearrange(input_image_torch, "h w c -> 1 c h w").to(model.device)
             cond["c_crossattn"] = [model.get_learned_conditioning([instruction]).to(model.device)]
             cond["c_concat"] = [model.encode_first_stage(input_image_torch).mode().to(model.device)]
             x_1 = nn.functional.interpolate(z_1, size=(height, width), mode="bilinear", align_corners=False)
             x_1 = torch.where(x_1 > 0, 1, -1)  # Thresholding step
+            x_1_mean = torch.sum(x_1).item()/x_1.numel()
+            if x_1_mean < -0.99:
                 seed += 1
                 retry_number +=1
                 if retry_number > max_retry:
             image_video.append((mix_image_np * 255).astype(np.uint8))
             mix_image = Image.fromarray((mix_image_np * 255).astype(np.uint8)).convert('RGB')
+            mix_result_with_red_mask = None
+            mask_video_path = None
+            image_video_path = None
+            edited_mask_copy = None
+            if generate_index == len(generate_list):
+                image_video_path = "image.mp4"
+                fps = 2
+                with imageio.get_writer(image_video_path, fps=fps) as video:
+                    for image in image_video:
+                        video.append_data(image)
+            yield [int(seed), text_cfg_scale, image_cfg_scale, edited_image, mix_image, edited_mask_copy, mask_video_path, image_video_path, input_image, mix_result_with_red_mask]
+            input_image_copy = mix_image
 def reset():
 # demo.launch(share=True)
+# demo.queue().launch(enable_queue=True)
 demo.queue().launch()