cakemus commited on
Commit
8a36720
·
1 Parent(s): 57a950e
Files changed (1) hide show
  1. app.py +30 -65
app.py CHANGED
@@ -3,7 +3,6 @@ import spaces
3
  #import gradio.helpers
4
  import torch
5
  import os
6
- import shutil
7
  from glob import glob
8
  from pathlib import Path
9
  from typing import Optional
@@ -18,40 +17,19 @@ from huggingface_hub import hf_hub_download
18
 
19
  #gradio.helpers.CACHED_FOLDER = '/data/cache'
20
 
21
- # OPTIONAL: Clear caches at startup to free space
22
- hf_cache = os.path.expanduser("~/.cache/huggingface")
23
- torch_cache = os.path.expanduser("~/.cache/torch")
24
- if os.path.exists(hf_cache):
25
- shutil.rmtree(hf_cache)
26
- if os.path.exists(torch_cache):
27
- shutil.rmtree(torch_cache)
28
-
29
- # Configure ZeroGPU to use memory instead of disk
30
- from spaces.zero.config import Config
31
- Config.zerogpu_offload_dir = None # Disable disk offloading to prevent disk space issues
32
-
33
  # Load the pipeline with authentication token
34
  pipe = StableVideoDiffusionPipeline.from_pretrained(
35
  "stabilityai/stable-video-diffusion-img2vid-xt",
36
  torch_dtype=torch.float16,
37
  variant="fp16",
38
- use_auth_token=os.getenv("HUGGINGFACE_TOKEN") # Fetch the token from environment if set
39
  )
40
  pipe.to("cuda")
 
 
41
 
42
  max_64_bit_int = 2**63 - 1
43
 
44
- def clean_outputs(output_folder: str, keep: int = 1):
45
- """
46
- Remove old video files to prevent using all disk space.
47
- Keeps the most recent <keep> files.
48
- """
49
- files = sorted(glob(os.path.join(output_folder, "*.mp4")), key=os.path.getmtime)
50
- if len(files) > keep:
51
- for old_file in files[:-keep]:
52
- os.remove(old_file)
53
-
54
- @spaces.GPU(duration=250)
55
  def sample(
56
  image: Image,
57
  seed: Optional[int] = 42,
@@ -76,75 +54,62 @@ def sample(
76
  base_count = len(glob(os.path.join(output_folder, "*.mp4")))
77
  video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
78
 
79
- # Reduce num_frames from 25 to 10 to consume less space
80
- frames = pipe(
81
- image,
82
- decode_chunk_size=decoding_t,
83
- generator=generator,
84
- motion_bucket_id=motion_bucket_id,
85
- noise_aug_strength=0.1,
86
- num_frames=10 # reduced from 25
87
- ).frames[0]
88
-
89
  export_to_video(frames, video_path, fps=fps_id)
90
  torch.manual_seed(seed)
91
-
92
- # Clean up old videos to prevent filling disk
93
- clean_outputs(output_folder, keep=2)
94
 
95
  return video_path, seed
96
 
97
  def resize_image(image, output_size=(1024, 576)):
98
  # Calculate aspect ratios
99
- target_aspect = output_size[0] / output_size[1]
100
- image_aspect = image.width / image.height
101
 
102
  # Resize then crop if the original image is larger
103
  if image_aspect > target_aspect:
 
104
  new_height = output_size[1]
105
  new_width = int(new_height * image_aspect)
106
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
 
107
  left = (new_width - output_size[0]) / 2
108
  top = 0
109
  right = (new_width + output_size[0]) / 2
110
  bottom = output_size[1]
111
  else:
 
112
  new_width = output_size[0]
113
  new_height = int(new_width / image_aspect)
114
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
 
115
  left = 0
116
  top = (new_height - output_size[1]) / 2
117
  right = output_size[0]
118
  bottom = (new_height + output_size[1]) / 2
119
 
 
120
  cropped_image = resized_image.crop((left, top, right, bottom))
121
  return cropped_image
122
 
123
  with gr.Blocks() as demo:
124
- gr.Markdown('''# Community demo for Stable Video Diffusion - Img2Vid - XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt), [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets), [stability's ui waitlist](https://stability.ai/contact))
125
- #### Research release ([_non-commercial_](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE)): generate `~4s` vid from a single image at (`10 frames` at `6 fps`). This demo uses [🧨 diffusers](https://huggingface.co/docs/diffusers/main/en/using-diffusers/svd) for low VRAM usage.
126
- ''')
127
- with gr.Row():
128
- with gr.Column():
129
- image = gr.Image(label="Upload your image", type="pil")
130
- generate_btn = gr.Button("Generate")
131
- video = gr.Video()
132
- with gr.Accordion("Advanced options", open=False):
133
- seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
134
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
135
- motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
136
- fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be num_frames/fps", value=6, minimum=5, maximum=30)
137
-
138
- # Resize on upload
139
- image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
140
-
141
- # Generate with sample() function
142
- generate_btn.click(
143
- fn=sample,
144
- inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id],
145
- outputs=[video, seed],
146
- api_name="video"
147
- )
148
 
149
  if __name__ == "__main__":
150
- demo.launch(show_api=False)
 
 
3
  #import gradio.helpers
4
  import torch
5
  import os
 
6
  from glob import glob
7
  from pathlib import Path
8
  from typing import Optional
 
17
 
18
  #gradio.helpers.CACHED_FOLDER = '/data/cache'
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # Load the pipeline with authentication token
21
  pipe = StableVideoDiffusionPipeline.from_pretrained(
22
  "stabilityai/stable-video-diffusion-img2vid-xt",
23
  torch_dtype=torch.float16,
24
  variant="fp16",
 
25
  )
26
  pipe.to("cuda")
27
+ #pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
28
+ #pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
29
 
30
  max_64_bit_int = 2**63 - 1
31
 
32
+ @spaces.GPU(duration=120)
 
 
 
 
 
 
 
 
 
 
33
  def sample(
34
  image: Image,
35
  seed: Optional[int] = 42,
 
54
  base_count = len(glob(os.path.join(output_folder, "*.mp4")))
55
  video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
56
 
57
+ frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
 
 
 
 
 
 
 
 
 
58
  export_to_video(frames, video_path, fps=fps_id)
59
  torch.manual_seed(seed)
 
 
 
60
 
61
  return video_path, seed
62
 
63
  def resize_image(image, output_size=(1024, 576)):
64
  # Calculate aspect ratios
65
+ target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
66
+ image_aspect = image.width / image.height # Aspect ratio of the original image
67
 
68
  # Resize then crop if the original image is larger
69
  if image_aspect > target_aspect:
70
+ # Resize the image to match the target height, maintaining aspect ratio
71
  new_height = output_size[1]
72
  new_width = int(new_height * image_aspect)
73
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
74
+ # Calculate coordinates for cropping
75
  left = (new_width - output_size[0]) / 2
76
  top = 0
77
  right = (new_width + output_size[0]) / 2
78
  bottom = output_size[1]
79
  else:
80
+ # Resize the image to match the target width, maintaining aspect ratio
81
  new_width = output_size[0]
82
  new_height = int(new_width / image_aspect)
83
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
84
+ # Calculate coordinates for cropping
85
  left = 0
86
  top = (new_height - output_size[1]) / 2
87
  right = output_size[0]
88
  bottom = (new_height + output_size[1]) / 2
89
 
90
+ # Crop the image
91
  cropped_image = resized_image.crop((left, top, right, bottom))
92
  return cropped_image
93
 
94
  with gr.Blocks() as demo:
95
+ gr.Markdown('''# Community demo for Stable Video Diffusion - Img2Vid - XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt), [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets), [stability's ui waitlist](https://stability.ai/contact))
96
+ #### Research release ([_non-commercial_](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE)): generate `4s` vid from a single image at (`25 frames` at `6 fps`). this demo uses [🧨 diffusers for low VRAM and fast generation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/svd).
97
+ ''')
98
+ with gr.Row():
99
+ with gr.Column():
100
+ image = gr.Image(label="Upload your image", type="pil")
101
+ generate_btn = gr.Button("Generate")
102
+ video = gr.Video()
103
+ with gr.Accordion("Advanced options", open=False):
104
+ seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
105
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
106
+ motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
107
+ fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
108
+
109
+ image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
110
+ generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
111
+
 
 
 
 
 
 
 
112
 
113
  if __name__ == "__main__":
114
+ #demo.queue(max_size=20, api_open=False)
115
+ demo.launch(share=True, show_api=False)