Spaces:
Runtime error
Runtime error
remove inits
Browse files
app.py
CHANGED
@@ -116,28 +116,28 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
116 |
clip_size = clip_model.visual.input_resolution
|
117 |
normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
|
118 |
std=[0.26862954, 0.26130258, 0.27577711])
|
119 |
-
|
120 |
|
121 |
#def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt):
|
122 |
all_frames = []
|
123 |
prompts = [text]
|
124 |
-
if image_prompts:
|
125 |
-
|
126 |
-
else:
|
127 |
-
|
128 |
batch_size = 1
|
129 |
clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
|
130 |
tv_scale = tv_scale # Controls the smoothness of the final output.
|
131 |
range_scale = range_scale # Controls how far out of range RGB values are allowed to be.
|
132 |
cutn = cutn
|
133 |
n_batches = 1
|
134 |
-
if init_image:
|
135 |
-
|
136 |
-
else:
|
137 |
-
|
138 |
skip_timesteps = skip_timesteps # This needs to be between approx. 200 and 500 when using an init image.
|
139 |
# Higher values make the output look more like the init.
|
140 |
-
init_scale = init_scale # This enhances the effect of the init image, a good value is 1000.
|
141 |
seed = seed
|
142 |
|
143 |
if seed is not None:
|
@@ -149,24 +149,25 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
149 |
txt, weight = parse_prompt(prompt)
|
150 |
target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
|
151 |
weights.append(weight)
|
152 |
-
for prompt in image_prompts:
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
target_embeds = torch.cat(target_embeds)
|
161 |
weights = torch.tensor(weights, device=device)
|
162 |
if weights.sum().abs() < 1e-3:
|
163 |
raise RuntimeError('The weights must not sum to 0.')
|
164 |
weights /= weights.sum().abs()
|
165 |
init = None
|
166 |
-
if init_image is not None:
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
170 |
cur_t = None
|
171 |
def cond_fn(x, t, y=None):
|
172 |
with torch.enable_grad():
|
@@ -184,9 +185,10 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
184 |
tv_losses = tv_loss(x_in)
|
185 |
range_losses = range_loss(out['pred_xstart'])
|
186 |
loss = losses.sum() * clip_guidance_scale + tv_losses.sum() * tv_scale + range_losses.sum() * range_scale
|
187 |
-
if init is not None and init_scale:
|
188 |
-
|
189 |
-
|
|
|
190 |
return -torch.autograd.grad(loss, x)[0]
|
191 |
if model_config['timestep_respacing'].startswith('ddim'):
|
192 |
sample_fn = diffusion.ddim_sample_loop_progressive
|
@@ -210,11 +212,9 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
210 |
if j % 1 == 0 or cur_t == -1:
|
211 |
print()
|
212 |
for k, image in enumerate(sample['pred_xstart']):
|
213 |
-
#filename = f'progress_{i * batch_size + k:05}.png'
|
214 |
img = TF.to_pil_image(image.add(1).div(2).clamp(0, 1))
|
215 |
all_frames.append(img)
|
216 |
tqdm.write(f'Batch {i}, step {j}, output {k}:')
|
217 |
-
#display.display(display.Image(filename))
|
218 |
writer = imageio.get_writer('video.mp4', fps=5)
|
219 |
for im in all_frames:
|
220 |
writer.append_data(np.array(im))
|
@@ -225,7 +225,7 @@ demo = gr.Blocks()
|
|
225 |
with demo:
|
226 |
gr.Markdown(
|
227 |
"""
|
228 |
-
# CLIP Guided Diffusion Faces Model
|
229 |
### by [Alex Spirin](https://linktr.ee/devdef)
|
230 |
Gradio Blocks demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them.
|
231 |
Based on the original [Space](https://huggingface.co/spaces/EleutherAI/clip-guided-diffusion) by akhaliq.
|
|
|
116 |
clip_size = clip_model.visual.input_resolution
|
117 |
normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
|
118 |
std=[0.26862954, 0.26130258, 0.27577711])
|
119 |
+
|
120 |
|
121 |
#def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt):
|
122 |
all_frames = []
|
123 |
prompts = [text]
|
124 |
+
# if image_prompts:
|
125 |
+
# image_prompts = [image_prompts.name]
|
126 |
+
# else:
|
127 |
+
# image_prompts = []
|
128 |
batch_size = 1
|
129 |
clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
|
130 |
tv_scale = tv_scale # Controls the smoothness of the final output.
|
131 |
range_scale = range_scale # Controls how far out of range RGB values are allowed to be.
|
132 |
cutn = cutn
|
133 |
n_batches = 1
|
134 |
+
# if init_image:
|
135 |
+
# init_image = init_image.name
|
136 |
+
# else:
|
137 |
+
# init_image = None # This can be an URL or Colab local path and must be in quotes.
|
138 |
skip_timesteps = skip_timesteps # This needs to be between approx. 200 and 500 when using an init image.
|
139 |
# Higher values make the output look more like the init.
|
140 |
+
# init_scale = init_scale # This enhances the effect of the init image, a good value is 1000.
|
141 |
seed = seed
|
142 |
|
143 |
if seed is not None:
|
|
|
149 |
txt, weight = parse_prompt(prompt)
|
150 |
target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
|
151 |
weights.append(weight)
|
152 |
+
# for prompt in image_prompts:
|
153 |
+
# path, weight = parse_prompt(prompt)
|
154 |
+
# img = Image.open(fetch(path)).convert('RGB')
|
155 |
+
# img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
|
156 |
+
# batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
|
157 |
+
# embed = clip_model.encode_image(normalize(batch)).float()
|
158 |
+
# target_embeds.append(embed)
|
159 |
+
# weights.extend([weight / cutn] * cutn)
|
160 |
target_embeds = torch.cat(target_embeds)
|
161 |
weights = torch.tensor(weights, device=device)
|
162 |
if weights.sum().abs() < 1e-3:
|
163 |
raise RuntimeError('The weights must not sum to 0.')
|
164 |
weights /= weights.sum().abs()
|
165 |
init = None
|
166 |
+
# if init_image is not None:
|
167 |
+
# lpips_model = lpips.LPIPS(net='vgg').to(device)
|
168 |
+
# init = Image.open(fetch(init_image)).convert('RGB')
|
169 |
+
# init = init.resize((side_x, side_y), Image.LANCZOS)
|
170 |
+
# init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
|
171 |
cur_t = None
|
172 |
def cond_fn(x, t, y=None):
|
173 |
with torch.enable_grad():
|
|
|
185 |
tv_losses = tv_loss(x_in)
|
186 |
range_losses = range_loss(out['pred_xstart'])
|
187 |
loss = losses.sum() * clip_guidance_scale + tv_losses.sum() * tv_scale + range_losses.sum() * range_scale
|
188 |
+
# if init is not None and init_scale:
|
189 |
+
|
190 |
+
# init_losses = lpips_model(x_in, init)
|
191 |
+
# loss = loss + init_losses.sum() * init_scale
|
192 |
return -torch.autograd.grad(loss, x)[0]
|
193 |
if model_config['timestep_respacing'].startswith('ddim'):
|
194 |
sample_fn = diffusion.ddim_sample_loop_progressive
|
|
|
212 |
if j % 1 == 0 or cur_t == -1:
|
213 |
print()
|
214 |
for k, image in enumerate(sample['pred_xstart']):
|
|
|
215 |
img = TF.to_pil_image(image.add(1).div(2).clamp(0, 1))
|
216 |
all_frames.append(img)
|
217 |
tqdm.write(f'Batch {i}, step {j}, output {k}:')
|
|
|
218 |
writer = imageio.get_writer('video.mp4', fps=5)
|
219 |
for im in all_frames:
|
220 |
writer.append_data(np.array(im))
|
|
|
225 |
with demo:
|
226 |
gr.Markdown(
|
227 |
"""
|
228 |
+
# CLIP Guided Openai Diffusion Faces Model
|
229 |
### by [Alex Spirin](https://linktr.ee/devdef)
|
230 |
Gradio Blocks demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them.
|
231 |
Based on the original [Space](https://huggingface.co/spaces/EleutherAI/clip-guided-diffusion) by akhaliq.
|