seawolf2357 commited on
Commit
ffc99a9
·
verified ·
1 Parent(s): 81ee7a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -103,7 +103,6 @@ def generate_image(prompt: str):
103
 
104
  # @spaces.GPU(duration=300, gpu_type="l40s")
105
  def infer(prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, frames=64):
106
-
107
  try:
108
  # 이미지 생성
109
  image_path = generate_image(prompt)
@@ -127,14 +126,14 @@ def infer(prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, frames=64):
127
  steps = 60
128
 
129
  batch_size = 1
130
- channels = model.module.model.diffusion_model.out_channels # DataParallel로 감싼 경우 model.module로 접근
131
  h, w = resolution[0] // 8, resolution[1] // 8
132
  noise_shape = [batch_size, channels, frames, h, w]
133
 
134
  with torch.no_grad(), torch.cuda.amp.autocast():
135
  text_emb = model.module.get_learned_conditioning([prompt])
136
 
137
- img_tensor = image.to(model.device)
138
  img_tensor = (img_tensor - 0.5) * 2
139
  image_tensor_resized = transform(img_tensor)
140
  videos = image_tensor_resized.unsqueeze(0)
@@ -147,7 +146,7 @@ def infer(prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, frames=64):
147
 
148
  imtext_cond = torch.cat([text_emb, img_emb], dim=1)
149
 
150
- fs = torch.tensor([fs], dtype=torch.long, device=model.device)
151
  cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]}
152
 
153
  batch_samples = batch_ddim_sampling(model.module, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale)
@@ -165,6 +164,7 @@ def infer(prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, frames=64):
165
  return None
166
  finally:
167
  torch.cuda.empty_cache()
 
168
 
169
  i2v_examples = [
170
  ['우주인 복장으로 기타를 치는 남자', 30, 7.5, 1.0, 6, 123, 64],
 
103
 
104
  # @spaces.GPU(duration=300, gpu_type="l40s")
105
  def infer(prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, frames=64):
 
106
  try:
107
  # 이미지 생성
108
  image_path = generate_image(prompt)
 
126
  steps = 60
127
 
128
  batch_size = 1
129
+ channels = model.module.model.diffusion_model.out_channels
130
  h, w = resolution[0] // 8, resolution[1] // 8
131
  noise_shape = [batch_size, channels, frames, h, w]
132
 
133
  with torch.no_grad(), torch.cuda.amp.autocast():
134
  text_emb = model.module.get_learned_conditioning([prompt])
135
 
136
+ img_tensor = image.to(torch.cuda.current_device())
137
  img_tensor = (img_tensor - 0.5) * 2
138
  image_tensor_resized = transform(img_tensor)
139
  videos = image_tensor_resized.unsqueeze(0)
 
146
 
147
  imtext_cond = torch.cat([text_emb, img_emb], dim=1)
148
 
149
+ fs = torch.tensor([fs], dtype=torch.long, device=torch.cuda.current_device())
150
  cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat]}
151
 
152
  batch_samples = batch_ddim_sampling(model.module, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale)
 
164
  return None
165
  finally:
166
  torch.cuda.empty_cache()
167
+
168
 
169
  i2v_examples = [
170
  ['우주인 복장으로 기타를 치는 남자', 30, 7.5, 1.0, 6, 123, 64],