Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,735 Bytes
3756d66 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import torch
import spaces
from diffusers import LTXConditionPipeline
from diffusers.utils import export_to_video
from gtts import gTTS
from pydub import AudioSegment
import whisper
import ffmpeg
import os
# Load pipeline
pipe = LTXConditionPipeline.from_pretrained(
"Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=torch.float16
)
pipe.to("cuda")
@spaces.GPU(duration=120)
def generate_video(prompt):
generator = torch.Generator("cuda").manual_seed(42)
# Generate latent video
latents = pipe(
prompt=prompt,
width=512,
height=512,
num_frames=24,
output_type="latent",
generator=generator,
num_inference_steps=7
).frames
# Decode frames
frames = pipe(
prompt=prompt,
latents=latents,
num_frames=24,
output_type="pil",
generator=generator,
num_inference_steps=7
).frames[0]
# Save as video
video_path = "output.mp4"
export_to_video(frames, video_path, fps=12)
# TTS
tts = gTTS(text=prompt, lang='en')
tts.save("voice.mp3")
AudioSegment.from_mp3("voice.mp3").export("voice.wav", format="wav")
# Subtitles
model = whisper.load_model("base")
result = model.transcribe("voice.wav", language="en")
with open("subtitles.srt", "w") as f:
f.write(result["srt"])
# Merge audio + subtitles into video
ffmpeg.input(video_path).output(
"final.mp4",
vf="subtitles=subtitles.srt",
i="voice.mp3",
c="copy",
shortest=None,
loglevel="error"
).run()
return "final.mp4"
# Gradio UI
demo = gr.Interface(fn=generate_video, inputs="text", outputs=gr.Video())
demo.launch()
|