Spaces:

Gradio-Blocks
/

clip-guided-faces

Runtime error

App Files Files Community

sxela commited on May 28, 2022

Commit

d8edde0

1 Parent(s): 866f637

remove inits

Browse files

Files changed (1) hide show

app.py +28 -28

app.py CHANGED Viewed

@@ -116,28 +116,28 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
     clip_size = clip_model.visual.input_resolution
     normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
                                      std=[0.26862954, 0.26130258, 0.27577711])
-    lpips_model = lpips.LPIPS(net='vgg').to(device)
 #def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt):
     all_frames = []
     prompts = [text]
-    if image_prompts:
-        image_prompts = [image_prompts.name]
-    else:
-        image_prompts = []
     batch_size = 1
     clip_guidance_scale = clip_guidance_scale  # Controls how much the image should look like the prompt.
     tv_scale = tv_scale             # Controls the smoothness of the final output.
     range_scale = range_scale            # Controls how far out of range RGB values are allowed to be.
     cutn = cutn
     n_batches = 1
-    if init_image:
-        init_image = init_image.name
-    else:
-        init_image = None   # This can be an URL or Colab local path and must be in quotes.
     skip_timesteps = skip_timesteps  # This needs to be between approx. 200 and 500 when using an init image.
                         # Higher values make the output look more like the init.
-    init_scale = init_scale      # This enhances the effect of the init image, a good value is 1000.
     seed = seed
     if seed is not None:
@@ -149,24 +149,25 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
         txt, weight = parse_prompt(prompt)
         target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
         weights.append(weight)
-    for prompt in image_prompts:
-        path, weight = parse_prompt(prompt)
-        img = Image.open(fetch(path)).convert('RGB')
-        img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
-        batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
-        embed = clip_model.encode_image(normalize(batch)).float()
-        target_embeds.append(embed)
-        weights.extend([weight / cutn] * cutn)
     target_embeds = torch.cat(target_embeds)
     weights = torch.tensor(weights, device=device)
     if weights.sum().abs() < 1e-3:
         raise RuntimeError('The weights must not sum to 0.')
     weights /= weights.sum().abs()
     init = None
-    if init_image is not None:
-        init = Image.open(fetch(init_image)).convert('RGB')
-        init = init.resize((side_x, side_y), Image.LANCZOS)
-        init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
     cur_t = None
     def cond_fn(x, t, y=None):
         with torch.enable_grad():
@@ -184,9 +185,10 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
             tv_losses = tv_loss(x_in)
             range_losses = range_loss(out['pred_xstart'])
             loss = losses.sum() * clip_guidance_scale + tv_losses.sum() * tv_scale + range_losses.sum() * range_scale
-            if init is not None and init_scale:
-                init_losses = lpips_model(x_in, init)
-                loss = loss + init_losses.sum() * init_scale
             return -torch.autograd.grad(loss, x)[0]
     if model_config['timestep_respacing'].startswith('ddim'):
         sample_fn = diffusion.ddim_sample_loop_progressive
@@ -210,11 +212,9 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
             if j % 1 == 0 or cur_t == -1:
                 print()
                 for k, image in enumerate(sample['pred_xstart']):
-                    #filename = f'progress_{i * batch_size + k:05}.png'
                     img = TF.to_pil_image(image.add(1).div(2).clamp(0, 1))
                     all_frames.append(img)
                     tqdm.write(f'Batch {i}, step {j}, output {k}:')
-                    #display.display(display.Image(filename))
     writer = imageio.get_writer('video.mp4', fps=5)
     for im in all_frames:
         writer.append_data(np.array(im))
@@ -225,7 +225,7 @@ demo = gr.Blocks()
 with demo:
     gr.Markdown(
     """
-    # CLIP Guided Diffusion Faces Model
     ### by [Alex Spirin](https://linktr.ee/devdef)
     Gradio Blocks demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them.
     Based on the original [Space](https://huggingface.co/spaces/EleutherAI/clip-guided-diffusion) by akhaliq.

     clip_size = clip_model.visual.input_resolution
     normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
                                      std=[0.26862954, 0.26130258, 0.27577711])
 #def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt):
     all_frames = []
     prompts = [text]
+    # if image_prompts:
+    #     image_prompts = [image_prompts.name]
+    # else:
+    #     image_prompts = []
     batch_size = 1
     clip_guidance_scale = clip_guidance_scale  # Controls how much the image should look like the prompt.
     tv_scale = tv_scale             # Controls the smoothness of the final output.
     range_scale = range_scale            # Controls how far out of range RGB values are allowed to be.
     cutn = cutn
     n_batches = 1
+    # if init_image:
+    #     init_image = init_image.name
+    # else:
+    #     init_image = None   # This can be an URL or Colab local path and must be in quotes.
     skip_timesteps = skip_timesteps  # This needs to be between approx. 200 and 500 when using an init image.
                         # Higher values make the output look more like the init.
+    # init_scale = init_scale      # This enhances the effect of the init image, a good value is 1000.
     seed = seed
     if seed is not None:
         txt, weight = parse_prompt(prompt)
         target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
         weights.append(weight)
+    # for prompt in image_prompts:
+    #     path, weight = parse_prompt(prompt)
+    #     img = Image.open(fetch(path)).convert('RGB')
+    #     img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
+    #     batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
+    #     embed = clip_model.encode_image(normalize(batch)).float()
+    #     target_embeds.append(embed)
+    #     weights.extend([weight / cutn] * cutn)
     target_embeds = torch.cat(target_embeds)
     weights = torch.tensor(weights, device=device)
     if weights.sum().abs() < 1e-3:
         raise RuntimeError('The weights must not sum to 0.')
     weights /= weights.sum().abs()
     init = None
+    # if init_image is not None:
+    #     lpips_model = lpips.LPIPS(net='vgg').to(device)
+    #     init = Image.open(fetch(init_image)).convert('RGB')
+    #     init = init.resize((side_x, side_y), Image.LANCZOS)
+    #     init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
     cur_t = None
     def cond_fn(x, t, y=None):
         with torch.enable_grad():
             tv_losses = tv_loss(x_in)
             range_losses = range_loss(out['pred_xstart'])
             loss = losses.sum() * clip_guidance_scale + tv_losses.sum() * tv_scale + range_losses.sum() * range_scale
+            # if init is not None and init_scale:
+            #     init_losses = lpips_model(x_in, init)
+            #     loss = loss + init_losses.sum() * init_scale
             return -torch.autograd.grad(loss, x)[0]
     if model_config['timestep_respacing'].startswith('ddim'):
         sample_fn = diffusion.ddim_sample_loop_progressive
             if j % 1 == 0 or cur_t == -1:
                 print()
                 for k, image in enumerate(sample['pred_xstart']):
                     img = TF.to_pil_image(image.add(1).div(2).clamp(0, 1))
                     all_frames.append(img)
                     tqdm.write(f'Batch {i}, step {j}, output {k}:')
     writer = imageio.get_writer('video.mp4', fps=5)
     for im in all_frames:
         writer.append_data(np.array(im))
 with demo:
     gr.Markdown(
     """
+    # CLIP Guided Openai Diffusion Faces Model
     ### by [Alex Spirin](https://linktr.ee/devdef)
     Gradio Blocks demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them.
     Based on the original [Space](https://huggingface.co/spaces/EleutherAI/clip-guided-diffusion) by akhaliq.