text_to_video / app.py
sam6309's picture
Update app.py
fc4557b verified
raw
history blame
3.69 kB
import os
import subprocess
import glob
import numpy as np
from PIL import Image
# Function to install missing dependencies
def install_dependencies():
packages = [
"groq",
"transformers",
"diffusers",
"gradio"
]
for package in packages:
try:
__import__(package)
except ImportError:
subprocess.check_call(["pip", "install", package])
# Install dependencies
install_dependencies()
# Import dependencies
from groq import Groq
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import gradio as gr
# Validate GROQ_API_KEY environment variable
def validate_groq_api_key():
if not os.environ.get("GROQ_API_KEY"):
# Set default API key if not present
os.environ["GROQ_API_KEY"] = "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"
# Initialize Groq Client
validate_groq_api_key()
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
# Example Groq Usage
def fetch_groq_completion(prompt):
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="llama3-8b-8192",
stream=False,
)
return chat_completion.choices[0].message.content
# Text Understanding with Hugging Face Transformers
def extract_key_entities(text):
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
entities = ner_pipeline(text)
return [entity["word"] for entity in entities]
def summarize_text(text):
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(text, max_length=50, min_length=10, do_sample=False)
return summary[0]['summary_text']
# Frame Generation using Stable Diffusion
def generate_frames(prompts, output_dir="frames"):
os.makedirs(output_dir, exist_ok=True)
model_id = "CompVis/stable-diffusion-v1-4"
sd_pipeline = StableDiffusionPipeline.from_pretrained(model_id)
frames = []
for i, prompt in enumerate(prompts):
image = sd_pipeline(prompt).images[0]
frame_path = os.path.join(output_dir, f"frame_{i:04d}.png")
image.save(frame_path)
frames.append(frame_path)
return frames
# Video Stitching with FFmpeg
def create_video_from_frames(frames_dir, output_video="output.mp4", fps=24):
frame_pattern = os.path.join(frames_dir, "frame_%04d.png")
command = [
"ffmpeg", "-y", "-framerate", str(fps), "-i", frame_pattern,
"-c:v", "libx264", "-pix_fmt", "yuv420p", output_video
]
subprocess.run(command, check=True)
return output_video
# Gradio Interface for Final Output
def generate_video_interface(prompt):
# Step 1: Fetch understanding from Groq
groq_response = fetch_groq_completion(prompt)
# Step 2: Extract entities and summarize
key_entities = extract_key_entities(groq_response)
summary = summarize_text(groq_response)
# Step 3: Generate frames
prompts = [f"{entity}, cinematic, ultra-realistic" for entity in key_entities]
frame_dir = "frames"
generate_frames(prompts, output_dir=frame_dir)
# Step 4: Create video
video_path = create_video_from_frames(frame_dir)
return video_path
# Launch Gradio App
def gradio_ui():
interface = gr.Interface(
fn=generate_video_interface,
inputs="text",
outputs="video",
title="Text-to-Video Generator",
description="Generate videos from text descriptions using open-source AI tools."
)
interface.launch()
if __name__ == "__main__":
gradio_ui()