Spaces:
Configuration error
Configuration error
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,7 +28,7 @@ from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
|
|
| 28 |
from transformers.image_transforms import convert_to_rgb
|
| 29 |
import spaces
|
| 30 |
|
| 31 |
-
|
| 32 |
def auto_inpainting(video_input, masked_video, mask, prompt, image, vae, text_encoder, image_encoder, diffusion, model, device, cfg_scale, img_cfg_scale, negative_prompt=""):
|
| 33 |
global use_fp16
|
| 34 |
image_prompt_embeds = None
|
|
@@ -83,7 +83,7 @@ def auto_inpainting(video_input, masked_video, mask, prompt, image, vae, text_en
|
|
| 83 |
video_clip = vae.decode(video_clip / 0.18215).sample # [16, 3, 256, 256]
|
| 84 |
return video_clip
|
| 85 |
|
| 86 |
-
|
| 87 |
def auto_inpainting_temp_split(video_input, masked_video, mask, prompt, image, vae, text_encoder, image_encoder, diffusion, model, device, scfg_scale, tcfg_scale, img_cfg_scale, negative_prompt=""):
|
| 88 |
global use_fp16
|
| 89 |
image_prompt_embeds = None
|
|
@@ -213,8 +213,17 @@ init_model()
|
|
| 213 |
# ========================================
|
| 214 |
# Video Generation
|
| 215 |
# ========================================
|
| 216 |
-
|
| 217 |
def video_generation(text, image, scfg_scale, tcfg_scale, img_cfg_scale, diffusion):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
with torch.no_grad():
|
| 219 |
print("begin generation", flush=True)
|
| 220 |
transform_video = transforms.Compose([
|
|
@@ -243,8 +252,17 @@ def video_generation(text, image, scfg_scale, tcfg_scale, img_cfg_scale, diffusi
|
|
| 243 |
# ========================================
|
| 244 |
# Video Prediction
|
| 245 |
# ========================================
|
| 246 |
-
|
| 247 |
def video_prediction(text, image, scfg_scale, tcfg_scale, img_cfg_scale, preframe, diffusion):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
with torch.no_grad():
|
| 249 |
print("begin generation", flush=True)
|
| 250 |
transform_video = transforms.Compose([
|
|
@@ -280,7 +298,7 @@ def video_prediction(text, image, scfg_scale, tcfg_scale, img_cfg_scale, prefram
|
|
| 280 |
# ========================================
|
| 281 |
# Judge Generation or Prediction
|
| 282 |
# ========================================
|
| 283 |
-
|
| 284 |
def gen_or_pre(text_input, image_input, scfg_scale, tcfg_scale, img_cfg_scale, preframe_input, diffusion_step):
|
| 285 |
default_step = [25, 40, 50, 100, 125, 200, 250]
|
| 286 |
difference = [abs(item - diffusion_step) for item in default_step]
|
|
|
|
| 28 |
from transformers.image_transforms import convert_to_rgb
|
| 29 |
import spaces
|
| 30 |
|
| 31 |
+
@spaces.GPU
|
| 32 |
def auto_inpainting(video_input, masked_video, mask, prompt, image, vae, text_encoder, image_encoder, diffusion, model, device, cfg_scale, img_cfg_scale, negative_prompt=""):
|
| 33 |
global use_fp16
|
| 34 |
image_prompt_embeds = None
|
|
|
|
| 83 |
video_clip = vae.decode(video_clip / 0.18215).sample # [16, 3, 256, 256]
|
| 84 |
return video_clip
|
| 85 |
|
| 86 |
+
@spaces.GPU
|
| 87 |
def auto_inpainting_temp_split(video_input, masked_video, mask, prompt, image, vae, text_encoder, image_encoder, diffusion, model, device, scfg_scale, tcfg_scale, img_cfg_scale, negative_prompt=""):
|
| 88 |
global use_fp16
|
| 89 |
image_prompt_embeds = None
|
|
|
|
| 213 |
# ========================================
|
| 214 |
# Video Generation
|
| 215 |
# ========================================
|
| 216 |
+
@spaces.GPU
|
| 217 |
def video_generation(text, image, scfg_scale, tcfg_scale, img_cfg_scale, diffusion):
|
| 218 |
+
global device
|
| 219 |
+
global output_path
|
| 220 |
+
global use_fp16
|
| 221 |
+
global model
|
| 222 |
+
global diffusion
|
| 223 |
+
global vae
|
| 224 |
+
global text_encoder
|
| 225 |
+
global image_encoder
|
| 226 |
+
global clip_image_processor
|
| 227 |
with torch.no_grad():
|
| 228 |
print("begin generation", flush=True)
|
| 229 |
transform_video = transforms.Compose([
|
|
|
|
| 252 |
# ========================================
|
| 253 |
# Video Prediction
|
| 254 |
# ========================================
|
| 255 |
+
@spaces.GPU
|
| 256 |
def video_prediction(text, image, scfg_scale, tcfg_scale, img_cfg_scale, preframe, diffusion):
|
| 257 |
+
global device
|
| 258 |
+
global output_path
|
| 259 |
+
global use_fp16
|
| 260 |
+
global model
|
| 261 |
+
global diffusion
|
| 262 |
+
global vae
|
| 263 |
+
global text_encoder
|
| 264 |
+
global image_encoder
|
| 265 |
+
global clip_image_processor
|
| 266 |
with torch.no_grad():
|
| 267 |
print("begin generation", flush=True)
|
| 268 |
transform_video = transforms.Compose([
|
|
|
|
| 298 |
# ========================================
|
| 299 |
# Judge Generation or Prediction
|
| 300 |
# ========================================
|
| 301 |
+
@spaces.GPU
|
| 302 |
def gen_or_pre(text_input, image_input, scfg_scale, tcfg_scale, img_cfg_scale, preframe_input, diffusion_step):
|
| 303 |
default_step = [25, 40, 50, 100, 125, 200, 250]
|
| 304 |
difference = [abs(item - diffusion_step) for item in default_step]
|