File size: 1,488 Bytes
c472b15
 
7b36a3f
c472b15
 
b0267fa
 
c472b15
 
 
 
 
 
 
3141b27
 
 
 
 
 
 
 
 
 
 
 
c472b15
 
 
 
 
3141b27
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import spaces
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video
import cv2
import numpy as np

pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()

@spaces.GPU(duration=250)
def generate(prompt, num_inference_steps=25):
  video_frames = pipe(prompt, num_inference_steps).frames
  resized_frames = []
  allowed_resolutions = [16, 24, 32, 40, 48, 56, 64, 128, 256, 512]  # Define allowed resolutions (multiples of 8)
  for frame in video_frames:
    height, width, _ = frame.shape
    # Find the closest allowed resolution smaller than the original
    new_height = max(res for res in allowed_resolutions if res < height)
    new_width = max(res for res in allowed_resolutions if res < width)
    resized_frame = cv2.resize(frame, (new_width, new_height))
    resized_frames.append(resized_frame)
  video_path = export_to_video(np.array(resized_frames))
  return video_path

prompt = gr.Textbox("Enter prompt to generate a video")
num_inference_steps = gr.Slider(10, 50, value=25)

interface = gr.Interface(
  generate,
  inputs=[prompt, num_inference_steps],
  examples=[["Astronaut riding a horse", 25], ["Darth vader surfing in waves", 20]],
  outputs="video",
  cache_examples=False,
  theme="soft"
).launch()