Stable-Video-Diffusion-Img2Vid

Paused

Fabrice-TIERCELIN commited on Jun 21, 2024

Commit

7187257

verified ·

1 Parent(s): c0b00f2

Try to use 2 models: one optimized for 25 f/s, another for 14 f/s (#17)

- Try to use 2 models: one optimized for 25 f/s, another for 14 f/s (4c56c8bedde0be0730d2ebb4814ee4b2ae8fdae1)

Co-authored-by: Fabrice TIERCELIN <[email protected]>

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,10 +12,15 @@ from PIL import Image
 import random
 import spaces
-pipe = StableVideoDiffusionPipeline.from_pretrained(
     "vdo/stable-video-diffusion-img2vid-xt-1-1", torch_dtype=torch.float16, variant="fp16"
 )
-pipe.to("cuda")
 max_64_bit_int = 2**63 - 1
@@ -44,7 +49,10 @@ def sample(
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
-    frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
     return video_path, gr.update(label="Generated frames in *." + frame_format + " format", format = frame_format, value = frames), seed

 import random
 import spaces
+fps25Pipe = StableVideoDiffusionPipeline.from_pretrained(
     "vdo/stable-video-diffusion-img2vid-xt-1-1", torch_dtype=torch.float16, variant="fp16"
 )
+fps25Pipe.to("cuda")
+fps14Pipe = StableVideoDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-video-diffusion-img2vid", torch_dtype=torch.float16, variant="fp16"
+)
+fps14Pipe.to("cuda")
 max_64_bit_int = 2**63 - 1
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
+    if 14 < fps_id:
+        frames = fps25Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
+    else:
+        frames = fps14Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
     return video_path, gr.update(label="Generated frames in *." + frame_format + " format", format = frame_format, value = frames), seed