Spaces:

Gradio-Blocks
/

clip-guided-faces

Runtime error

App Files Files Community

sxela commited on May 31, 2022

Commit

964e0fc

1 Parent(s): bc42778

fix image inputs

Browse files

Files changed (1) hide show

app.py +10 -16

app.py CHANGED Viewed

@@ -80,7 +80,7 @@ def tv_loss(input):
 def range_loss(input):
     return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
-def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn):
     # Model settings
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
     model_config = model_and_diffusion_defaults()
@@ -121,20 +121,14 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
     all_frames = []
     prompts = [text]
-    if image_prompts:
-        image_prompts = [image_prompts.name]
-    else:
-        image_prompts = []
     batch_size = 1
     clip_guidance_scale = clip_guidance_scale  # Controls how much the image should look like the prompt.
     tv_scale = tv_scale             # Controls the smoothness of the final output.
     range_scale = range_scale            # Controls how far out of range RGB values are allowed to be.
     cutn = cutn
     n_batches = 1
-    if init_image:
-        init_image = init_image.name
-    else:
-        init_image = None   # This can be an URL or Colab local path and must be in quotes.
     skip_timesteps = skip_timesteps  # This needs to be between approx. 200 and 500 when using an init image.
                         # Higher values make the output look more like the init.
     init_scale = init_scale      # This enhances the effect of the init image, a good value is 1000.
@@ -149,14 +143,13 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
         txt, weight = parse_prompt(prompt)
         target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
         weights.append(weight)
-    for prompt in image_prompts:
-        path, weight = parse_prompt(prompt)
-        img = Image.open(fetch(path)).convert('RGB')
         img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
         batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
         embed = clip_model.encode_image(normalize(batch)).float()
         target_embeds.append(embed)
-        weights.extend([weight / cutn] * cutn)
     target_embeds = torch.cat(target_embeds)
     weights = torch.tensor(weights, device=device)
     if weights.sum().abs() < 1e-3:
@@ -165,7 +158,7 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
     init = None
     if init_image is not None:
         lpips_model = lpips.LPIPS(net='vgg').to(device)
-        init = Image.open(fetch(init_image)).convert('RGB')
         init = init.resize((side_x, side_y), Image.LANCZOS)
         init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
     cur_t = None
@@ -253,10 +246,11 @@ with demo:
             with gr.Column():
               init_image = gr.Image(source="upload", label='initial image (optional)')
               init_scale = gr.Slider(minimum=0, maximum=45, step=1, value=10, label="Look like the image above")
           # with gr.Group():
             with gr.Column():
               image_prompts = gr.Image(source="upload", label='image prompt (optional)')
-              skip_timesteps = gr.Slider(minimum=0, maximum=1000, step=1, value=0, label="Look like the image above")
       with gr.Group():
         with gr.Row():
@@ -279,6 +273,6 @@ with demo:
     outputs=[output_image,output_video]
-    run_button.click(inference, inputs=[text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn], outputs=outputs)
 demo.launch(enable_queue=True)

 def range_loss(input):
     return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
+def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn, im_prompt_weight):
     # Model settings
     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
     model_config = model_and_diffusion_defaults()
     all_frames = []
     prompts = [text]
     batch_size = 1
     clip_guidance_scale = clip_guidance_scale  # Controls how much the image should look like the prompt.
     tv_scale = tv_scale             # Controls the smoothness of the final output.
     range_scale = range_scale            # Controls how far out of range RGB values are allowed to be.
     cutn = cutn
     n_batches = 1
     skip_timesteps = skip_timesteps  # This needs to be between approx. 200 and 500 when using an init image.
                         # Higher values make the output look more like the init.
     init_scale = init_scale      # This enhances the effect of the init image, a good value is 1000.
         txt, weight = parse_prompt(prompt)
         target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
         weights.append(weight)
+    if image_prompts is not None:
+        img = Image.fromarray(image_prompts).convert('RGB')
         img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
         batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
         embed = clip_model.encode_image(normalize(batch)).float()
         target_embeds.append(embed)
+        weights.extend([im_prompt_weight / cutn] * cutn)
     target_embeds = torch.cat(target_embeds)
     weights = torch.tensor(weights, device=device)
     if weights.sum().abs() < 1e-3:
     init = None
     if init_image is not None:
         lpips_model = lpips.LPIPS(net='vgg').to(device)
+        init = Image.fromarray(init_image).convert('RGB')
         init = init.resize((side_x, side_y), Image.LANCZOS)
         init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
     cur_t = None
             with gr.Column():
               init_image = gr.Image(source="upload", label='initial image (optional)')
               init_scale = gr.Slider(minimum=0, maximum=45, step=1, value=10, label="Look like the image above")
+              skip_timesteps = gr.Slider(minimum=0, maximum=1000, step=1, value=0, label="Style strength")
           # with gr.Group():
             with gr.Column():
               image_prompts = gr.Image(source="upload", label='image prompt (optional)')
+              im_prompt_weight = gr.Slider(minimum=0, maximum=10, step=1, value=1, label="Look like the image above")
       with gr.Group():
         with gr.Row():
     outputs=[output_image,output_video]
+    run_button.click(inference, inputs=[text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn, im_prompt_weight], outputs=outputs)
 demo.launch(enable_queue=True)