ruslanmv commited on
Commit
8c3e070
ยท
1 Parent(s): 6dc42a4

First commit

Browse files
Files changed (2) hide show
  1. app.py +526 -0
  2. requirements.txt +16 -0
app.py ADDED
@@ -0,0 +1,526 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import multiprocessing
4
+ import subprocess
5
+ import nltk
6
+ import gradio as gr
7
+ import matplotlib.pyplot as plt
8
+ import gc
9
+ from huggingface_hub import snapshot_download, hf_hub_download
10
+ from typing import List
11
+ import shutil
12
+ import numpy as np
13
+ import random
14
+ import spaces
15
+ import torch
16
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, CLIPFeatureExtractor
17
+ from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
18
+ from diffusers.utils import export_to_video
19
+ from moviepy.editor import VideoFileClip, CompositeVideoClip, TextClip
20
+ import moviepy.editor as mpy
21
+ from PIL import Image, ImageDraw, ImageFont
22
+ from mutagen.mp3 import MP3
23
+ from gtts import gTTS
24
+ from pydub import AudioSegment
25
+ import uuid
26
+ from safetensors.torch import load_file
27
+ import textwrap
28
+
29
+ # -------------------------------------------------------------------
30
+ # No more ImageMagick dependency!
31
+ # -------------------------------------------------------------------
32
+ print("ImageMagick dependency removed. Using Pillow for text rendering.")
33
+
34
+ # Ensure NLTKโ€™s 'punkt_tab' (and other data) is present
35
+ nltk.download('punkt_tab', quiet=True)
36
+ nltk.download('punkt', quiet=True)
37
+
38
+ # -------------------------------------------------------------------
39
+ # GPU / Environment Setup
40
+ # -------------------------------------------------------------------
41
+ def log_gpu_memory():
42
+ """Log GPU memory usage."""
43
+ if torch.cuda.is_available():
44
+ print(subprocess.check_output('nvidia-smi').decode('utf-8'))
45
+ else:
46
+ print("CUDA is not available. Cannot log GPU memory.")
47
+
48
+ def check_gpu_availability():
49
+ """Print GPU availability and device details."""
50
+ if torch.cuda.is_available():
51
+ print(f"CUDA devices: {torch.cuda.device_count()}")
52
+ print(f"Current device: {torch.cuda.current_device()}")
53
+ print(torch.cuda.get_device_properties(torch.cuda.current_device()))
54
+ else:
55
+ print("CUDA is not available. Running on CPU.")
56
+
57
+ check_gpu_availability()
58
+
59
+ # Ensure proper multiprocessing start method
60
+ multiprocessing.set_start_method("spawn", force=True)
61
+
62
+ # -------------------------------------------------------------------
63
+ # Constants & Model Setup
64
+ # -------------------------------------------------------------------
65
+ dtype = torch.float16
66
+ device = "cuda" if torch.cuda.is_available() else "cpu"
67
+
68
+ MAX_SEED = np.iinfo(np.int32).max
69
+ MAX_IMAGE_SIZE_720 = 720 # Changed maximum image size to 720, now max resolution is 720p
70
+ MAX_IMAGE_SIZE = MAX_IMAGE_SIZE_720
71
+
72
+ RESOLUTIONS = {
73
+ "16:9": [
74
+ {"resolution": "360p", "width": 640, "height": 360},
75
+ {"resolution": "480p", "width": 854, "height": 480},
76
+ {"resolution": "720p", "width": 1280, "height": 720},
77
+ #{"resolution": "1080p", "width": 1920, "height": 1080} # Commented out resolutions higher than 720p
78
+ ],
79
+ "4:3": [
80
+ {"resolution": "360p", "width": 480, "height": 360},
81
+ {"resolution": "480p", "width": 640, "height": 480},
82
+ {"resolution": "720p", "width": 960, "height": 720},
83
+ #{"resolution": "1080p", "width": 1440, "height": 1080} # Commented out resolutions higher than 720p
84
+ ],
85
+ "1:1": [
86
+ {"resolution": "360p", "width": 360, "height": 360},
87
+ {"resolution": "480p", "width": 480, "height": 480},
88
+ {"resolution": "720p", "width": 720, "height": 720},
89
+ #{"resolution": "1080p", "width": 1080, "height": 1080}, # Commented out resolutions higher than 720p
90
+ #{"resolution": "1920p", "width": 1920, "height": 1920} # Commented out resolutions higher than 720p
91
+ ],
92
+ "9:16": [
93
+ {"resolution": "360p", "width": 360, "height": 640},
94
+ {"resolution": "480p", "width": 480, "height": 854},
95
+ {"resolution": "720p", "width": 720, "height": 1280},
96
+ #{"resolution": "1080p", "width": 1080, "height": 1920} # Commented out resolutions higher than 720p
97
+ ]}
98
+
99
+
100
+ DESCRIPTION = (
101
+ "Video Story Generator with Audio\n"
102
+ "PS: Generation of video by using Artificial Intelligence via AnimateDiff, DistilBART, and GTTS."
103
+ )
104
+ TITLE = "Video Story Generator with Audio (AnimateDiff, DistilBART, and GTTS)"
105
+
106
+ @spaces.GPU()
107
+ def load_text_summarization_model():
108
+ """Load the tokenizer and model for text summarization on GPU/CPU."""
109
+ print("Loading text summarization model...")
110
+ tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
111
+ model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
112
+ return tokenizer, model
113
+
114
+ tokenizer, model = load_text_summarization_model()
115
+
116
+ # Base models for AnimateDiffLightning
117
+ bases = {
118
+ "Cartoon": "frankjoshua/toonyou_beta6",
119
+ "Realistic": "emilianJR/epiCRealism",
120
+ "3d": "Lykon/DreamShaper",
121
+ "Anime": "Yntec/mistoonAnime2"
122
+ }
123
+
124
+ # Keep track of what's loaded to avoid reloading each time
125
+ step_loaded = None
126
+ base_loaded = "Realistic"
127
+ motion_loaded = None
128
+
129
+ # Initialize AnimateDiff pipeline
130
+ if not torch.cuda.is_available():
131
+ raise NotImplementedError("No GPU detected!")
132
+
133
+ pipe = AnimateDiffPipeline.from_pretrained(
134
+ bases[base_loaded],
135
+ torch_dtype=dtype
136
+ ).to(device)
137
+
138
+ pipe.scheduler = EulerDiscreteScheduler.from_config(
139
+ pipe.scheduler.config,
140
+ timestep_spacing="trailing",
141
+ beta_schedule="linear"
142
+ )
143
+
144
+ feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32")
145
+
146
+
147
+ # -------------------------------------------------------------------
148
+ # Function: Generate Short Animation
149
+ # -------------------------------------------------------------------
150
+ def generate_short_animation(
151
+ prompt_text: str,
152
+ base: str = "Realistic",
153
+ motion: str = "",
154
+ step: int = 4,
155
+ seed: int = 42,
156
+ width: int = 512,
157
+ height: int = 512,
158
+ ) -> str:
159
+ """
160
+ Generates a short animated video (MP4) from a given prompt using AnimateDiffLightning.
161
+ Returns the local path to the resulting MP4.
162
+ """
163
+ global step_loaded
164
+ global base_loaded
165
+ global motion_loaded
166
+
167
+ # 1) Possibly reload correct step weights
168
+ if step_loaded != step:
169
+ repo = "ByteDance/AnimateDiff-Lightning"
170
+ ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
171
+ pipe.unet.load_state_dict(
172
+ load_file(hf_hub_download(repo, ckpt), device=device),
173
+ strict=False
174
+ )
175
+ step_loaded = step
176
+
177
+ # 2) Possibly reload the correct base model
178
+ if base_loaded != base:
179
+ pipe.unet.load_state_dict(
180
+ torch.load(
181
+ hf_hub_download(bases[base], "unet/diffusion_pytorch_model.bin"),
182
+ map_location=device
183
+ ),
184
+ strict=False
185
+ )
186
+ base_loaded = base
187
+
188
+ # 3) Possibly unload/load motion LORA
189
+ if motion_loaded != motion:
190
+ pipe.unload_lora_weights()
191
+ if motion:
192
+ pipe.load_lora_weights(motion, adapter_name="motion")
193
+ pipe.set_adapters(["motion"], [0.7]) # weighting can be adjusted
194
+ motion_loaded = motion
195
+
196
+ # 4) Generate frames
197
+ print(f"[INFO] Generating short animation for prompt: '{prompt_text}' ...")
198
+ generator = torch.Generator(device=device).manual_seed(seed) if seed is not None else None
199
+ output = pipe(
200
+ prompt=prompt_text,
201
+ guidance_scale=1.2,
202
+ num_inference_steps=step,
203
+ generator=generator,
204
+ width=width,
205
+ height=height
206
+ )
207
+
208
+ # 5) Export frames to a short MP4
209
+ short_mp4_path = f"short_{uuid.uuid4().hex}.mp4"
210
+ export_to_video(output.frames[0], short_mp4_path, fps=10)
211
+ return short_mp4_path
212
+
213
+ # -------------------------------------------------------------------
214
+ # Function: Merge MP3 files
215
+ # -------------------------------------------------------------------
216
+ def merge_audio_files(mp3_names: List[str]) -> str:
217
+ """
218
+ Merges a list of MP3 files into a single MP3 file.
219
+ Returns the path to the merged MP3 file.
220
+ """
221
+ combined = AudioSegment.empty()
222
+ for f_name in mp3_names:
223
+ audio = AudioSegment.from_mp3(f_name)
224
+ combined += audio
225
+ export_path = f"merged_audio_{uuid.uuid4().hex}.mp3" # Dynamic output path for merged audio
226
+ combined.export(export_path, format="mp3")
227
+ print(f"DEBUG: Audio files merged and saved to {export_path}")
228
+ return export_path
229
+
230
+
231
+ # -------------------------------------------------------------------
232
+ # Function: Overlay Subtitles on a Video
233
+ # -------------------------------------------------------------------
234
+
235
+ def add_subtitles_to_video(input_video_path: str, text: str, duration: float) -> str:
236
+ """
237
+ Overlays `text` as subtitles over the entire `input_video_path` for `duration` seconds using Pillow.
238
+ Returns the path to the newly generated MP4 with subtitles.
239
+ """
240
+ base_clip = VideoFileClip(input_video_path)
241
+ final_dur = max(duration, base_clip.duration)
242
+
243
+ def make_frame(t):
244
+ frame_pil = Image.fromarray(base_clip.get_frame(t))
245
+ draw = ImageDraw.Draw(frame_pil)
246
+ try:
247
+ font = ImageFont.truetype("arial.ttf", 40) # Change the font size if needed
248
+ except IOError:
249
+ font = ImageFont.load_default() # Use default font if Arial is not found
250
+
251
+ # Correctly compute text size using `textbbox()`
252
+ bbox = draw.textbbox((0, 0), text, font=font)
253
+ textwidth, textheight = bbox[2] - bbox[0], bbox[3] - bbox[1]
254
+
255
+ x = (frame_pil.width - textwidth) / 2
256
+ y = frame_pil.height - 70 - textheight # Position at the bottom
257
+
258
+ draw.text((x, y), text, font=font, fill=(255, 255, 0)) # Yellow color
259
+ return np.array(frame_pil)
260
+
261
+ # Create the video clip without `size` argument
262
+ subtitled_clip = mpy.VideoClip(make_frame, duration=final_dur)
263
+
264
+ # Composite the subtitled clip over the original video
265
+ final_clip = CompositeVideoClip([base_clip, subtitled_clip.set_position((0, 0))])
266
+ final_clip = final_clip.set_duration(final_dur)
267
+
268
+ out_path = f"sub_{uuid.uuid4().hex}.mp4"
269
+ final_clip.write_videofile(out_path, fps=24, logger=None)
270
+
271
+ # Cleanup
272
+ base_clip.close()
273
+ final_clip.close()
274
+ subtitled_clip.close()
275
+
276
+ return out_path
277
+
278
+
279
+
280
+ # -------------------------------------------------------------------
281
+ # Main Function: Generate Output Video
282
+ # -------------------------------------------------------------------
283
+ @spaces.GPU()
284
+ def get_output_video(text, base_model_name, motion_name, num_inference_steps_backend, randomize_seed, seed, width, height):
285
+ """
286
+ Summarize the user prompt, generate a short animated video for each sentence,
287
+ overlay subtitles, merge all into a final video with a single audio track.
288
+ """
289
+ print("DEBUG: Starting get_output_video function...")
290
+
291
+ # Summarize the input text
292
+ print("DEBUG: Summarizing text...")
293
+ device_local = "cuda" if torch.cuda.is_available() else "cpu"
294
+ model.to(device_local) # Move summarization model to GPU/CPU as needed
295
+
296
+ inputs = tokenizer(
297
+ text,
298
+ max_length=1024,
299
+ truncation=True,
300
+ return_tensors="pt"
301
+ ).to(device_local)
302
+
303
+ summary_ids = model.generate(inputs["input_ids"])
304
+ summary = tokenizer.batch_decode(
305
+ summary_ids,
306
+ skip_special_tokens=True,
307
+ clean_up_tokenization_spaces=False
308
+ )
309
+ plot = list(summary[0].split('.')) # Split summary into sentences
310
+ print(f"DEBUG: Summary generated: {plot}")
311
+
312
+ # Prepare seed based on randomize_seed checkbox
313
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else seed
314
+
315
+ # We'll generate a short video for each sentence
316
+ # We'll also create an audio track for each sentence
317
+ short_videos = []
318
+ mp3_names = []
319
+ mp3_lengths = []
320
+ result_no_audio = f"result_no_audio_{uuid.uuid4().hex}.mp4" # Dynamic filename for no audio video
321
+ movie_final = f'result_final_{uuid.uuid4().hex}.mp4' # Dynamic filename for final video
322
+ merged_audio_path = "" # To store merged audio path for cleanup
323
+
324
+ try: # Try-finally block to ensure cleanup
325
+ for i, sentence in enumerate(plot[:-1]):
326
+ # 1) Generate short video for this sentence
327
+ prompt_for_animation = f"Generate a realistic video about this: {sentence}"
328
+ print(f"DEBUG: Generating short video {i+1} of {len(plot)-1} ...")
329
+ short_mp4_path = generate_short_animation(
330
+ prompt_text=prompt_for_animation,
331
+ base=base_model_name,
332
+ motion=motion_name,
333
+ step=int(num_inference_steps_backend),
334
+ seed=current_seed + i, # Increment seed for each sentence for variation
335
+ width=width,
336
+ height=height
337
+ )
338
+
339
+ # 2) Generate audio for the sentence
340
+ audio_filename = f'audio_{uuid.uuid4().hex}_{i}.mp3' # Dynamic audio filename
341
+ tts_obj = gTTS(text=sentence, lang='en', slow=False)
342
+ tts_obj.save(audio_filename)
343
+ audio_info = MP3(audio_filename)
344
+ audio_duration = audio_info.info.length
345
+ mp3_names.append(audio_filename)
346
+ mp3_lengths.append(audio_duration)
347
+
348
+ # 3) Overlay subtitles on top of the short video (using Pillow now)
349
+ final_clip_duration = audio_duration + 0.5 # half-second pad
350
+ short_subtitled_path = add_subtitles_to_video(
351
+ input_video_path=short_mp4_path,
352
+ text=sentence.strip(),
353
+ duration=final_clip_duration
354
+ )
355
+ short_videos.append(short_subtitled_path)
356
+
357
+ # Clean up the original short clip (no subtitles)
358
+ os.remove(short_mp4_path)
359
+
360
+ # ----------------------------------------------------------------
361
+ # Merge all MP3 files into one
362
+ # ----------------------------------------------------------------
363
+ merged_audio_path = merge_audio_files(mp3_names)
364
+
365
+ # ----------------------------------------------------------------
366
+ # Concatenate all short subtitled videos
367
+ # ----------------------------------------------------------------
368
+ print("DEBUG: Concatenating all short videos into a single clip...")
369
+ clip_objects = []
370
+ for vid_path in short_videos:
371
+ clip = mpy.VideoFileClip(vid_path)
372
+ clip_objects.append(clip)
373
+
374
+ final_concat = mpy.concatenate_videoclips(clip_objects, method="compose")
375
+ final_concat.write_videofile(result_no_audio, fps=24, logger=None)
376
+
377
+ # ----------------------------------------------------------------
378
+ # Combine big video with merged audio
379
+ # ----------------------------------------------------------------
380
+ def combine_audio(vidname, audname, outname, fps=24):
381
+ print(f"DEBUG: Combining audio for video: '{vidname}'")
382
+ my_clip = mpy.VideoFileClip(vidname)
383
+ audio_background = mpy.AudioFileClip(audname)
384
+ final_clip = my_clip.set_audio(audio_background)
385
+ final_clip.write_videofile(outname, fps=fps, logger=None)
386
+ my_clip.close()
387
+ final_clip.close()
388
+
389
+ combine_audio(result_no_audio, merged_audio_path, movie_final)
390
+
391
+ finally: # Cleanup always executes
392
+ print("DEBUG: Cleaning up temporary files...")
393
+ # Remove short subtitled videos
394
+ for path_ in short_videos:
395
+ os.remove(path_)
396
+ # Remove mp3 segments
397
+ for f_mp3 in mp3_names:
398
+ os.remove(f_mp3)
399
+ # Remove merged audio
400
+ if os.path.exists(merged_audio_path):
401
+ os.remove(merged_audio_path)
402
+ # Remove partial no-audio mp4
403
+ if os.path.exists(result_no_audio):
404
+ os.remove(result_no_audio)
405
+
406
+ print("DEBUG: get_output_video function completed successfully.")
407
+ return movie_final
408
+
409
+ # -------------------------------------------------------------------
410
+ # Example text (user can override)
411
+ # -------------------------------------------------------------------
412
+ text = (
413
+ "Once, there was a girl called Laura who went to the supermarket to buy the ingredients to make a cake. "
414
+ "Because today is her birthday and her friends come to her house and help her to prepare the cake."
415
+ )
416
+
417
+ # -------------------------------------------------------------------
418
+ # Gradio Interface
419
+ # -------------------------------------------------------------------
420
+ with gr.Blocks(css="style.css") as demo:
421
+ gr.Markdown(
422
+ """
423
+ # Video Generator โšก from stories with Artificial Intelligence
424
+
425
+ A story can be input by user. The story is summarized using DistilBART model.
426
+ Then, the images are generated by using AnimateDiff and AnimateDiff-Lightning,
427
+ and the subtitles and audio are created using gTTS. These are combined to generate a video.
428
+
429
+ **Credits**: Developed by [ruslanmv.com](https://ruslanmv.com).
430
+ """
431
+ )
432
+
433
+ with gr.Group():
434
+ with gr.Row():
435
+ input_start_text = gr.Textbox(value=text, label='Prompt')
436
+ with gr.Row():
437
+ select_base = gr.Dropdown(
438
+ label='Base model',
439
+ choices=["Cartoon", "Realistic", "3d", "Anime"],
440
+ value=base_loaded,
441
+ interactive=True
442
+ )
443
+ select_motion = gr.Dropdown(
444
+ label='Motion',
445
+ choices=[
446
+ ("Default", ""),
447
+ ("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
448
+ ("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
449
+ ("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
450
+ ("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
451
+ ("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
452
+ ("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
453
+ ("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
454
+ ("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
455
+ ],
456
+ value="", # default: no motion lora
457
+ interactive=True
458
+ )
459
+ select_step = gr.Dropdown(
460
+ label='Inference steps',
461
+ choices=[('1-Step', 1), ('2-Step', 2), ('4-Step', 4), ('8-Step', 8)],
462
+ value=4,
463
+ interactive=True
464
+ )
465
+ button_gen_video = gr.Button(
466
+ scale=1,
467
+ variant='primary',
468
+ value="Generate Video"
469
+ )
470
+
471
+ with gr.Accordion("Advanced Settings", open=False):
472
+ seed = gr.Slider(
473
+ label="Seed",
474
+ minimum=0,
475
+ maximum=MAX_SEED,
476
+ step=1,
477
+ value=42,
478
+ )
479
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
480
+ with gr.Row():
481
+ width = gr.Slider(
482
+ label="Width",
483
+ minimum=256,
484
+ maximum=MAX_IMAGE_SIZE_720, # ์ œํ•œ 720 pixels maximum ์‚ฌ์ด์ฆˆ, updated max size to 720p
485
+ step=1,
486
+ value=640, # Default width for 480p 4:3
487
+ )
488
+ height = gr.Slider(
489
+ label="Height",
490
+ minimum=256,
491
+ maximum=MAX_IMAGE_SIZE_720, # ์ œํ•œ 720 pixels maximum ์‚ฌ์ด์ฆˆ, updated max size to 720p
492
+ step=1,
493
+ value=480, # Default height for 480p 4:3
494
+ )
495
+
496
+
497
+ with gr.Column():
498
+ output_interpolation = gr.Video(label="Generated Video")
499
+
500
+
501
+
502
+ button_gen_video.click(
503
+ fn=get_output_video,
504
+ inputs=[input_start_text, select_base, select_motion, select_step, randomize_seed, seed, width, height],
505
+ outputs=output_interpolation
506
+ )
507
+
508
+ # Optionally, some examples
509
+ gr.Examples(
510
+ examples=[
511
+ ["Focus: Eiffel Tower (Animate: Clouds moving)"],
512
+ ["Focus: Trees In forest (Animate: Lion running)"],
513
+ ["Focus: Astronaut in Space"],
514
+ ["Focus: Group of Birds in sky (Animate: Birds Moving) (Shot From distance)"],
515
+ ["Focus: Statue of liberty (Shot from Drone) (Animate: Drone coming toward statue)"],
516
+ ["Focus: Panda in Forest (Animate: Drinking Tea)"],
517
+ ["Focus: Kids Playing (Season: Winter)"],
518
+ ["Focus: Cars in Street (Season: Rain, Daytime) (Shot from Distance) (Movement: Cars running)"]
519
+ ],
520
+ fn=get_output_video,
521
+ inputs=[input_start_text, select_base, select_motion, select_step, randomize_seed, seed, width, height],
522
+ outputs=output_interpolation,
523
+ cache_examples="lazy",
524
+ )
525
+
526
+ demo.queue().launch(debug=True, share=False)
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ gradio
3
+ opencv-python
4
+ peft
5
+ spaces
6
+ git+https://github.com/huggingface/diffusers.git
7
+ #diffusers
8
+ invisible_watermark
9
+ transformers==4.42.4
10
+ xformers
11
+ sentencepiece
12
+ mutagen
13
+ gTTS==2.5.4
14
+ nltk
15
+ moviepy==1.0.3
16
+ torchvision --index-url https://download.pytorch.org/whl/cu118