cakemus commited on
Commit
45a6a13
·
1 Parent(s): b56981c

video concat test

Browse files
Files changed (1) hide show
  1. app.py +180 -74
app.py CHANGED
@@ -15,6 +15,10 @@ import uuid
15
  import random
16
  from huggingface_hub import hf_hub_download
17
 
 
 
 
 
18
  #gradio.helpers.CACHED_FOLDER = '/data/cache'
19
 
20
  pipe = StableVideoDiffusionPipeline.from_pretrained(
@@ -26,103 +30,205 @@ pipe.to("cuda")
26
 
27
  max_64_bit_int = 2**63 - 1
28
 
29
- @spaces.GPU(duration=120)
30
- def sample(
31
- image: Image,
32
- seed: Optional[int] = 42,
33
- randomize_seed: bool = True,
34
- motion_bucket_id: int = 127,
35
- fps_id: int = 6,
36
- version: str = "svd_xt",
37
- cond_aug: float = 0.02,
38
- decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
39
- device: str = "cuda",
40
- output_folder: str = "outputs",
41
- progress=gr.Progress(track_tqdm=True)
42
- ):
43
- if image.mode == "RGBA":
44
- image = image.convert("RGB")
45
-
46
- if(randomize_seed):
47
- seed = random.randint(0, max_64_bit_int)
48
- generator = torch.manual_seed(seed)
49
-
50
- os.makedirs(output_folder, exist_ok=True)
51
- base_count = len(glob(os.path.join(output_folder, "*.mp4")))
52
- video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
53
-
54
- frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
55
- export_to_video(frames, video_path, fps=fps_id)
56
- torch.manual_seed(seed)
57
-
58
- return video_path, seed
59
-
60
  def resize_image(image, output_size=(1024, 576)):
61
- # Calculate aspect ratios
62
- target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
63
- image_aspect = image.width / image.height # Aspect ratio of the original image
 
 
 
64
 
65
- # Resize then crop if the original image is larger
66
  if image_aspect > target_aspect:
67
- # Resize the image to match the target height, maintaining aspect ratio
68
  new_height = output_size[1]
69
  new_width = int(new_height * image_aspect)
70
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
71
- # Calculate coordinates for cropping
72
  left = (new_width - output_size[0]) / 2
73
  top = 0
74
  right = (new_width + output_size[0]) / 2
75
  bottom = output_size[1]
76
  else:
77
- # Resize the image to match the target width, maintaining aspect ratio
78
  new_width = output_size[0]
79
  new_height = int(new_width / image_aspect)
80
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
81
- # Calculate coordinates for cropping
82
  left = 0
83
  top = (new_height - output_size[1]) / 2
84
  right = output_size[0]
85
  bottom = (new_height + output_size[1]) / 2
86
 
87
- # Crop the image
88
  cropped_image = resized_image.crop((left, top, right, bottom))
89
  return cropped_image
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  with gr.Blocks() as demo:
92
- gr.Markdown('''# Community demo for Stable Video Diffusion - Img2Vid - XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt), [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets), [stability's ui waitlist](https://stability.ai/contact))
93
- #### Research release ([_non-commercial_](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE)): generate `4s` vid from a single image at (`25 frames` at `6 fps`). this demo uses [🧨 diffusers for low VRAM and fast generation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/svd).
94
- ''')
95
- with gr.Row():
96
- with gr.Column():
97
- image = gr.Image(label="Upload your image", type="pil")
98
- generate_btn = gr.Button("Generate")
99
- video = gr.Video()
100
- with gr.Accordion("Advanced options", open=False):
101
- seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
102
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
103
- motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
104
- fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
105
-
106
- image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
107
- generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
108
- gr.Examples(
109
- examples=[
110
- "images/blink_meme.png",
111
- "images/confused2_meme.png",
112
- "images/disaster_meme.png",
113
- "images/distracted_meme.png",
114
- "images/hide_meme.png",
115
- "images/nazare_meme.png",
116
- "images/success_meme.png",
117
- "images/willy_meme.png",
118
- "images/wink_meme.png"
119
- ],
120
- inputs=image,
121
- outputs=[video, seed],
122
- fn=sample,
123
- cache_examples="lazy",
124
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  if __name__ == "__main__":
127
- #demo.queue(max_size=20, api_open=False)
128
  demo.launch(share=True, show_api=False)
 
15
  import random
16
  from huggingface_hub import hf_hub_download
17
 
18
+ # NEW CODE HERE:
19
+ # If moviepy is not installed by default, you need to ensure your Space installs it (e.g. in requirements.txt).
20
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
21
+
22
  #gradio.helpers.CACHED_FOLDER = '/data/cache'
23
 
24
  pipe = StableVideoDiffusionPipeline.from_pretrained(
 
30
 
31
  max_64_bit_int = 2**63 - 1
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def resize_image(image, output_size=(1024, 576)):
34
+ """
35
+ Resizes/crops the image to match a target resolution without
36
+ distorting aspect ratio.
37
+ """
38
+ target_aspect = output_size[0] / output_size[1]
39
+ image_aspect = image.width / image.height
40
 
 
41
  if image_aspect > target_aspect:
 
42
  new_height = output_size[1]
43
  new_width = int(new_height * image_aspect)
44
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
 
45
  left = (new_width - output_size[0]) / 2
46
  top = 0
47
  right = (new_width + output_size[0]) / 2
48
  bottom = output_size[1]
49
  else:
 
50
  new_width = output_size[0]
51
  new_height = int(new_width / image_aspect)
52
  resized_image = image.resize((new_width, new_height), Image.LANCZOS)
 
53
  left = 0
54
  top = (new_height - output_size[1]) / 2
55
  right = output_size[0]
56
  bottom = (new_height + output_size[1]) / 2
57
 
 
58
  cropped_image = resized_image.crop((left, top, right, bottom))
59
  return cropped_image
60
 
61
+ # NEW CODE HERE:
62
+ def combine_videos(video_paths, output_path="outputs/final_long_video.mp4"):
63
+ """
64
+ Concatenate a list of MP4 videos into one MP4.
65
+ """
66
+ clips = [VideoFileClip(vp) for vp in video_paths]
67
+ final_clip = concatenate_videoclips(clips, method="compose")
68
+ final_clip.write_videofile(output_path, codec="libx264", fps=clips[0].fps, audio=False)
69
+ return output_path
70
+
71
+ # NEW CODE HERE:
72
+ # We create a helper function that returns both the frames and the snippet path
73
+ def generate_snippet(
74
+ init_image: Image,
75
+ seed: int,
76
+ motion_bucket_id: int,
77
+ fps_id: int,
78
+ decoding_t: int = 3,
79
+ device: str = "cuda",
80
+ output_folder: str = "outputs"
81
+ ):
82
+ """
83
+ Generate a short snippet from `init_image` using the pipeline.
84
+ Returns: (frames, video_path)
85
+ """
86
+ generator = torch.manual_seed(seed)
87
+ os.makedirs(output_folder, exist_ok=True)
88
+ base_count = len(glob(os.path.join(output_folder, "*.mp4")))
89
+ video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
90
+
91
+ # Generate frames
92
+ result = pipe(
93
+ init_image,
94
+ decode_chunk_size=decoding_t,
95
+ generator=generator,
96
+ motion_bucket_id=motion_bucket_id,
97
+ noise_aug_strength=0.1,
98
+ num_frames=25
99
+ )
100
+ frames = result.frames[0] # a list of PIL images
101
+
102
+ # Save snippet
103
+ export_to_video(frames, video_path, fps=fps_id)
104
+
105
+ return frames, video_path
106
+
107
+ @spaces.GPU(duration=120)
108
+ def sample_long(
109
+ image: Image,
110
+ seed: Optional[int] = 42,
111
+ randomize_seed: bool = True,
112
+ motion_bucket_id: int = 127,
113
+ fps_id: int = 6,
114
+ cond_aug: float = 0.02,
115
+ decoding_t: int = 3, # Number of frames decoded at a time! This can be lowered if VRAM is an issue.
116
+ device: str = "cuda",
117
+ output_folder: str = "outputs",
118
+ progress=gr.Progress(track_tqdm=True)
119
+ ):
120
+ """
121
+ Generate 5 snippets in a row. Each new snippet starts from the last frame of the previous snippet.
122
+ Return the path to the final, concatenated MP4.
123
+ """
124
+ if image.mode == "RGBA":
125
+ image = image.convert("RGB")
126
+
127
+ if randomize_seed:
128
+ seed = random.randint(0, max_64_bit_int)
129
+ torch.manual_seed(seed)
130
+
131
+ snippet_paths = []
132
+ current_image = image
133
+
134
+ for i in range(5):
135
+ frames, snippet_path = generate_snippet(
136
+ init_image=current_image,
137
+ seed=seed,
138
+ motion_bucket_id=motion_bucket_id,
139
+ fps_id=fps_id,
140
+ decoding_t=decoding_t,
141
+ device=device,
142
+ output_folder=output_folder
143
+ )
144
+ snippet_paths.append(snippet_path)
145
+
146
+ # Get the last frame for the next snippet
147
+ last_frame = frames[-1] # PIL image
148
+ current_image = last_frame
149
+
150
+ # Optional: re-seed each time if you like randomness in every snippet
151
+ # Otherwise, keep the same seed for a more cohesive “style”
152
+ # If you want random seeds each snippet, uncomment:
153
+ # seed = random.randint(0, max_64_bit_int)
154
+
155
+ # Concatenate all snippets
156
+ final_video_path = os.path.join(output_folder, "final_long_video.mp4")
157
+ final_video_path = combine_videos(snippet_paths, output_path=final_video_path)
158
+
159
+ return final_video_path, seed
160
+
161
+
162
  with gr.Blocks() as demo:
163
+ gr.Markdown('''# Community demo for Stable Video Diffusion - Img2Vid - XT
164
+ ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt),
165
+ [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets),
166
+ [stability's ui waitlist](https://stability.ai/contact))
167
+ #### Research release ([_non-commercial_](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE)):
168
+ Generate a longer video by chaining together multiple short snippets.
169
+ ''')
170
+
171
+ with gr.Row():
172
+ with gr.Column():
173
+ image = gr.Image(label="Upload your image", type="pil")
174
+ generate_btn = gr.Button("Generate Long Video (5 snippets)")
175
+ video = gr.Video()
176
+
177
+ with gr.Accordion("Advanced options", open=False):
178
+ seed = gr.Slider(
179
+ label="Seed",
180
+ value=42,
181
+ randomize=True,
182
+ minimum=0,
183
+ maximum=max_64_bit_int,
184
+ step=1
185
+ )
186
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
187
+ motion_bucket_id = gr.Slider(
188
+ label="Motion bucket id",
189
+ info="Controls how much motion to add/remove from the image",
190
+ value=127,
191
+ minimum=1,
192
+ maximum=255
193
+ )
194
+ fps_id = gr.Slider(
195
+ label="Frames per second",
196
+ info="The length of your video in seconds will be 25/fps",
197
+ value=6,
198
+ minimum=5,
199
+ maximum=30
200
+ )
201
+
202
+ # Automatically resize on image upload
203
+ image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
204
+
205
+ # NEW: Generate a *long* video composed of 5 short snippets
206
+ generate_btn.click(
207
+ fn=sample_long,
208
+ inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id],
209
+ outputs=[video, seed],
210
+ api_name="video"
211
+ )
212
+
213
+ # You can still provide examples as you did before, but now the
214
+ # pipeline will chain 5 videos by default.
215
+ gr.Examples(
216
+ examples=[
217
+ "images/blink_meme.png",
218
+ "images/confused2_meme.png",
219
+ "images/disaster_meme.png",
220
+ "images/distracted_meme.png",
221
+ "images/hide_meme.png",
222
+ "images/nazare_meme.png",
223
+ "images/success_meme.png",
224
+ "images/willy_meme.png",
225
+ "images/wink_meme.png"
226
+ ],
227
+ inputs=image,
228
+ outputs=[video, seed],
229
+ fn=sample_long,
230
+ cache_examples="lazy",
231
+ )
232
 
233
  if __name__ == "__main__":
 
234
  demo.launch(share=True, show_api=False)