Spaces:

snap-research
/

weights2weights

Running on Zero

App Files Files Community

amildravid4292 commited on Jul 22, 2024

Commit

f821ec0

verified ·

1 Parent(s): 7dcf34d

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -159,39 +159,39 @@ def inference(net, prompt, negative_prompt, guidance_scale, ddim_steps, seed):
     generator = torch.Generator(device=device).manual_seed(seed)
     latents = torch.randn(
-                (1, self.unet.in_channels, 512 // 8, 512 // 8),
                 generator = generator,
-                device = self.device
             ).bfloat16()
-    text_input = self.tokenizer(prompt, padding="max_length", max_length=self.tokenizer.model_max_length, truncation=True, return_tensors="pt")
-    text_embeddings = self.text_encoder(text_input.input_ids.to(device))[0]
     max_length = text_input.input_ids.shape[-1]
-    uncond_input = self.tokenizer(
                                     [negative_prompt], padding="max_length", max_length=max_length, return_tensors="pt"
                                 )
-    uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).bfloat16()
-    self.noise_scheduler.set_timesteps(ddim_steps)
     latents = latents * self.noise_scheduler.init_noise_sigma
-    for i,t in enumerate(tqdm.tqdm(self.noise_scheduler.timesteps)):
         latent_model_input = torch.cat([latents] * 2)
-        latent_model_input = self.noise_scheduler.scale_model_input(latent_model_input, timestep=t)
         with network:
-            noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings, timestep_cond= None).sample
         #guidance
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-        latents = self.noise_scheduler.step(noise_pred, t, latents).prev_sample
     latents = 1 / 0.18215 * latents
-    image = self.vae.decode(latents.float()).sample
     image = (image / 2 + 0.5).clamp(0, 1)
     image = image.detach().cpu().float().permute(0, 2, 3, 1).numpy()[0]

     generator = torch.Generator(device=device).manual_seed(seed)
     latents = torch.randn(
+                (1, unet.in_channels, 512 // 8, 512 // 8),
                 generator = generator,
+                device = device
             ).bfloat16()
+    text_input = self.tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
+    text_embeddings = text_encoder(text_input.input_ids.to(device))[0]
     max_length = text_input.input_ids.shape[-1]
+    uncond_input = tokenizer(
                                     [negative_prompt], padding="max_length", max_length=max_length, return_tensors="pt"
                                 )
+    uncond_embeddings = text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).bfloat16()
+    noise_scheduler.set_timesteps(ddim_steps)
     latents = latents * self.noise_scheduler.init_noise_sigma
+    for i,t in enumerate(tqdm.tqdm(noise_scheduler.timesteps)):
         latent_model_input = torch.cat([latents] * 2)
+        latent_model_input = noise_scheduler.scale_model_input(latent_model_input, timestep=t)
         with network:
+            noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings, timestep_cond= None).sample
         #guidance
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+        latents = noise_scheduler.step(noise_pred, t, latents).prev_sample
     latents = 1 / 0.18215 * latents
+    image = vae.decode(latents.float()).sample
     image = (image / 2 + 0.5).clamp(0, 1)
     image = image.detach().cpu().float().permute(0, 2, 3, 1).numpy()[0]