Spaces:
Runtime error
Runtime error
fix image inputs
Browse files
app.py
CHANGED
@@ -80,7 +80,7 @@ def tv_loss(input):
|
|
80 |
def range_loss(input):
|
81 |
return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
|
82 |
|
83 |
-
def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn):
|
84 |
# Model settings
|
85 |
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
86 |
model_config = model_and_diffusion_defaults()
|
@@ -121,20 +121,14 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
121 |
|
122 |
all_frames = []
|
123 |
prompts = [text]
|
124 |
-
|
125 |
-
image_prompts = [image_prompts.name]
|
126 |
-
else:
|
127 |
-
image_prompts = []
|
128 |
batch_size = 1
|
129 |
clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
|
130 |
tv_scale = tv_scale # Controls the smoothness of the final output.
|
131 |
range_scale = range_scale # Controls how far out of range RGB values are allowed to be.
|
132 |
cutn = cutn
|
133 |
n_batches = 1
|
134 |
-
|
135 |
-
init_image = init_image.name
|
136 |
-
else:
|
137 |
-
init_image = None # This can be an URL or Colab local path and must be in quotes.
|
138 |
skip_timesteps = skip_timesteps # This needs to be between approx. 200 and 500 when using an init image.
|
139 |
# Higher values make the output look more like the init.
|
140 |
init_scale = init_scale # This enhances the effect of the init image, a good value is 1000.
|
@@ -149,14 +143,13 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
149 |
txt, weight = parse_prompt(prompt)
|
150 |
target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
|
151 |
weights.append(weight)
|
152 |
-
|
153 |
-
|
154 |
-
img = Image.open(fetch(path)).convert('RGB')
|
155 |
img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
|
156 |
batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
|
157 |
embed = clip_model.encode_image(normalize(batch)).float()
|
158 |
target_embeds.append(embed)
|
159 |
-
weights.extend([
|
160 |
target_embeds = torch.cat(target_embeds)
|
161 |
weights = torch.tensor(weights, device=device)
|
162 |
if weights.sum().abs() < 1e-3:
|
@@ -165,7 +158,7 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
165 |
init = None
|
166 |
if init_image is not None:
|
167 |
lpips_model = lpips.LPIPS(net='vgg').to(device)
|
168 |
-
init = Image.
|
169 |
init = init.resize((side_x, side_y), Image.LANCZOS)
|
170 |
init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
|
171 |
cur_t = None
|
@@ -253,10 +246,11 @@ with demo:
|
|
253 |
with gr.Column():
|
254 |
init_image = gr.Image(source="upload", label='initial image (optional)')
|
255 |
init_scale = gr.Slider(minimum=0, maximum=45, step=1, value=10, label="Look like the image above")
|
|
|
256 |
# with gr.Group():
|
257 |
with gr.Column():
|
258 |
image_prompts = gr.Image(source="upload", label='image prompt (optional)')
|
259 |
-
|
260 |
|
261 |
with gr.Group():
|
262 |
with gr.Row():
|
@@ -279,6 +273,6 @@ with demo:
|
|
279 |
|
280 |
outputs=[output_image,output_video]
|
281 |
|
282 |
-
run_button.click(inference, inputs=[text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn], outputs=outputs)
|
283 |
|
284 |
demo.launch(enable_queue=True)
|
|
|
80 |
def range_loss(input):
|
81 |
return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
|
82 |
|
83 |
+
def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn, im_prompt_weight):
|
84 |
# Model settings
|
85 |
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
86 |
model_config = model_and_diffusion_defaults()
|
|
|
121 |
|
122 |
all_frames = []
|
123 |
prompts = [text]
|
124 |
+
|
|
|
|
|
|
|
125 |
batch_size = 1
|
126 |
clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
|
127 |
tv_scale = tv_scale # Controls the smoothness of the final output.
|
128 |
range_scale = range_scale # Controls how far out of range RGB values are allowed to be.
|
129 |
cutn = cutn
|
130 |
n_batches = 1
|
131 |
+
|
|
|
|
|
|
|
132 |
skip_timesteps = skip_timesteps # This needs to be between approx. 200 and 500 when using an init image.
|
133 |
# Higher values make the output look more like the init.
|
134 |
init_scale = init_scale # This enhances the effect of the init image, a good value is 1000.
|
|
|
143 |
txt, weight = parse_prompt(prompt)
|
144 |
target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
|
145 |
weights.append(weight)
|
146 |
+
if image_prompts is not None:
|
147 |
+
img = Image.fromarray(image_prompts).convert('RGB')
|
|
|
148 |
img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
|
149 |
batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
|
150 |
embed = clip_model.encode_image(normalize(batch)).float()
|
151 |
target_embeds.append(embed)
|
152 |
+
weights.extend([im_prompt_weight / cutn] * cutn)
|
153 |
target_embeds = torch.cat(target_embeds)
|
154 |
weights = torch.tensor(weights, device=device)
|
155 |
if weights.sum().abs() < 1e-3:
|
|
|
158 |
init = None
|
159 |
if init_image is not None:
|
160 |
lpips_model = lpips.LPIPS(net='vgg').to(device)
|
161 |
+
init = Image.fromarray(init_image).convert('RGB')
|
162 |
init = init.resize((side_x, side_y), Image.LANCZOS)
|
163 |
init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
|
164 |
cur_t = None
|
|
|
246 |
with gr.Column():
|
247 |
init_image = gr.Image(source="upload", label='initial image (optional)')
|
248 |
init_scale = gr.Slider(minimum=0, maximum=45, step=1, value=10, label="Look like the image above")
|
249 |
+
skip_timesteps = gr.Slider(minimum=0, maximum=1000, step=1, value=0, label="Style strength")
|
250 |
# with gr.Group():
|
251 |
with gr.Column():
|
252 |
image_prompts = gr.Image(source="upload", label='image prompt (optional)')
|
253 |
+
im_prompt_weight = gr.Slider(minimum=0, maximum=10, step=1, value=1, label="Look like the image above")
|
254 |
|
255 |
with gr.Group():
|
256 |
with gr.Row():
|
|
|
273 |
|
274 |
outputs=[output_image,output_video]
|
275 |
|
276 |
+
run_button.click(inference, inputs=[text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompts,timestep_respacing, cutn, im_prompt_weight], outputs=outputs)
|
277 |
|
278 |
demo.launch(enable_queue=True)
|