NeuralFalcon's picture
Update app.py
dbac26d verified
raw
history blame
12.9 kB
import gradio as gr
from deep_translator import GoogleTranslator
import os
import shutil
import subprocess
from PIL import Image, ImageDraw, ImageFont
import re
import uuid
zip_file = "./fonts.zip"
extract_to = "./fonts/"
shutil.unpack_archive(zip_file, extract_to, 'zip')
# Generate unique filename for the video
def tts_file_name(text):
global temp_folder
# Remove all non-alphabetic characters and convert to lowercase
text = re.sub(r'[^a-zA-Z\s]', '', text) # Retain only alphabets and spaces
text = text.lower().strip() # Convert to lowercase and strip leading/trailing spaces
text = text.replace(" ", "_") # Replace spaces with underscores
# Truncate or handle empty text
truncated_text = text[:20] if len(text) > 20 else text if len(text) > 0 else ""
# Generate a random string for uniqueness
random_string = uuid.uuid4().hex[:8].upper()
# Construct the file name
file_name = f"{temp_folder}/{truncated_text}_{random_string}.mp4"
return file_name
temp_folder="./save_video"
os.makedirs(temp_folder,exist_ok=True)
# Translate text function
def translate_text(text, target_language):
try:
translator = GoogleTranslator(source='auto', target=target_language)
return translator.translate(text)
except Exception as e:
print(f"Translation error: {e}")
return text
ForeignLanguages = {
"en": "English", "zh-CN": "Mandarin Chinese", "hi": "Hindi", "es": "Spanish",
"fr": "French", "ar": "Standard Arabic", "bn": "Bengali", "pt": "Portuguese",
"ru": "Russian", "ur": "Urdu", "id": "Indonesian", "de": "German", "ja": "Japanese",
"pa": "Punjabi", "te": "Telugu", "tr": "Turkish", "ta": "Tamil", "vi": "Vietnamese", "ko": "Korean"
}
LocalIndianLanguages = {
"en": "English","hi": "Hindi", "bn": "Bengali", "mr": "Marathi", "te": "Telugu", "ta": "Tamil",
"gu": "Gujarati", "ur": "Urdu", "kn": "Kannada", "or": "Odia", "pa": "Punjabi", "ml": "Malayalam",
"mai": "Maithili","ne": "Nepali","sa": "Sanskrit","doi": "Dogri","sd": "Sindhi"
}
# Font Mapping
font_mapping = {
"Arial-Bold.TTF": [ "it", "pt", "tr", "id"],
"OpenSans-Bold.ttf": ["ru", "vi"],
"Poppins-Bold.ttf": ["en","es","fr","hi","de"],
"NotoSansJP-Bold.ttf":["ja"],
"NotoSansBengali-Bold.ttf":["bn"],
"NotoSansKR-Bold.ttf":["ko"],
"NotoSansTamil-Bold.ttf":["ta"],
"AnekTelugu-Bold.ttf":["te"],
"BraahOne-Regular.ttf":["pa"],
"NotoNastaliqUrdu-Bold.ttf":["ur"],
"NotoSansSC-Bold.ttf":["zh-CN"],
"NotoSansArabic-Bold.ttf":["ar","sd"],
"NotoSansGujarati-Bold.ttf":["gu"],
"NotoSansKannada-Bold.ttf":["kn"],
"AnekOdia-Bold.ttf":["or"],
"NotoSansMalayalam-Bold.ttf":["ml"],
"NotoSans-Bold.ttf":["mr"]
}
# Get font for language
def get_font_for_language(language):
for font, languages in font_mapping.items():
if language in languages:
return f"./fonts/{font}"
return "./fonts/NotoSans-Bold.ttf"
# Create image function
def create_image(text, language="en", font_size=100, text_color="#000000", bg_color="#FFFFFF", width=1024, height=1024, output_folder="./"):
img = Image.new('RGB', (width, height), color=bg_color)
draw = ImageDraw.Draw(img)
# font_path = "NotoSans-Regular.ttf"
font_path = get_font_for_language(language)
print(font_path)
font = ImageFont.truetype(font_path, font_size)
# try:
# font = ImageFont.truetype(font_path, font_size)
# except IOError:
# font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), text, font=font)
text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
x, y = (width - text_width) // 2, (height - text_height) // 2
draw.text((x, y), text, fill=text_color, font=font)
filename = os.path.join(output_folder, f"frame_{language}.png")
img.save(filename)
return filename
import subprocess
def get_video_duration(video_file):
# Run FFmpeg to get video information in JSON format
result = subprocess.run(
['ffmpeg', '-i', video_file],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE
)
# Decode the stderr output to get the duration information
output = result.stderr.decode()
# Extract the duration from the output using regex
match = re.search(r'Duration: (\d+):(\d+):(\d+\.\d+)', output)
if match:
hours = int(match.group(1))
minutes = int(match.group(2))
seconds = float(match.group(3))
# Convert total duration to seconds
total_seconds = hours * 3600 + minutes * 60 + seconds
return total_seconds
else:
raise ValueError("Could not extract video duration.")
import json
# Generate video function
def generate_video(input_text, language_set, font_size, theme, canvas_size,llm_translation=None):
width, height = map(int, canvas_size.split('x'))
theme_colors = {"Black Background": ("#FFFFFF", "#000000"), "White Background": ("#000000", "#FFFFFF")}
text_color, bg_color = theme_colors[theme]
output_folder = "temp_frames"
frames = "./frames"
# Cleanup previous frames
if os.path.exists(output_folder):
shutil.rmtree(output_folder)
os.makedirs(output_folder)
if os.path.exists(frames):
shutil.rmtree(frames)
os.makedirs(frames)
# Choose language set
language_list = list(ForeignLanguages.keys()) if language_set == "Foreign Languages" else list(LocalIndianLanguages.keys())
image_files = []
FPS = 30
DURATION_PER_IMAGE = 0.5 # 50ms duration per language
FRAMES_PER_IMAGE = round(FPS * DURATION_PER_IMAGE) # Frames needed per image
extra_frames = FRAMES_PER_IMAGE+10 # Extra frames for the last image to allow fade-out effect
frame_index = 0 # Start numbering frames
if llm_translation:
data = json.loads(llm_translation)
language_list = list(data.keys())
for i, lang in enumerate(language_list):
translated_text = data[lang]
img_path = create_image(translated_text, lang, font_size, text_color, bg_color, width, height, output_folder)
frame_count = FRAMES_PER_IMAGE + extra_frames if i == len(language_list) - 1 else FRAMES_PER_IMAGE
# Duplicate frames for smooth video
for _ in range(frame_count):
frame_filename = os.path.join(frames, f"{frame_index:05d}.png")
shutil.copy(img_path, frame_filename)
frame_index += 1
else:
for i, lang in enumerate(language_list):
translated_text = translate_text(input_text, lang) if lang != 'en' else input_text
img_path = create_image(translated_text, lang, font_size, text_color, bg_color, width, height, output_folder)
# Check if it's the last image
frame_count = FRAMES_PER_IMAGE + extra_frames if i == len(language_list) - 1 else FRAMES_PER_IMAGE
# Duplicate frames for smooth video
for _ in range(frame_count):
frame_filename = os.path.join(frames, f"{frame_index:05d}.png")
shutil.copy(img_path, frame_filename)
frame_index += 1
# Generate video using FFmpeg
output_video = "multi_language_video.mp4"
subprocess.run([
"ffmpeg", "-y", "-r", str(FPS), "-i", f"{frames}/%05d.png",
"-c:v", "libx264", "-pix_fmt", "yuv420p", output_video
])
# Add music to the generated video with fade-out effect
music_file = "./music.WAV" # Path to your music file
if not os.path.exists(music_file):
return output_video,output_video
final_video = tts_file_name(input_text)
# subprocess.run([
# "ffmpeg", "-y", "-i", output_video, "-i", music_file,
# "-c:v", "libx264", "-c:a", "aac", "-strict", "experimental",
# "-af", "afade=t=out:st=4:d=1", # Fade out music at 4s, for 1 second
# "-shortest", final_video
# ])
# # Example usage
video_duration = get_video_duration(output_video)
# Adjust the start time and duration for the fade-out effect
fade_out_start = max(0, video_duration - 1) # Ensure the fade starts 1 second before the video ends
subprocess.run([
"ffmpeg", "-y", "-i", output_video, "-i", music_file,
"-c:v", "libx264", "-c:a", "aac", "-strict", "experimental",
"-af", f"afade=t=out:st={fade_out_start}:d=1", # Fade out for the last 1 second
"-shortest", final_video
])
shutil.rmtree(output_folder) # Cleanup after generating video
shutil.rmtree(frames) # Cleanup after generating frames
return final_video, final_video
# Gradio UI Setup
def ui():
dummy_examples = [
["Hello", "Foreign Languages"],
["No", "Local Indian Languages"]
]
with gr.Blocks() as demo:
gr.Markdown("<center><h1 style='font-size: 40px;'>See Your Name in Different Languages</h1></center>") # Larger title with CSS
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label='πŸ“ Enter Your Name', lines=3)
language_set = gr.Radio(
["Foreign Languages", "Local Indian Languages"],
label="🌍Language Set",
value="Foreign Languages"
)
canvas_size = gr.Radio(
["1920x1080", "1080x1920", "1024x1024"],
value="1024x1024",
label="πŸ–₯️ Canvas Size"
)
generate_btn = gr.Button('πŸš€ Generate', variant='primary')
with gr.Accordion('πŸ–ŒοΈ Text Style', open=False):
font_size = gr.Slider(20, 200, value=100, step=1, label="πŸ”  Font Size") # FIXED (Removed comma)
theme = gr.Radio(["Black Background", "White Background"], label="🎨 Theme", value="Black Background")
llm_translation = gr.Textbox(label='πŸ“ LLM Translation', lines=5, placeholder="Enter a word...",value=None)
with gr.Column():
download_video = gr.File(label="πŸ“₯ Download Video")
play_video = gr.Video(label="🎬 Generated Video")
# Define Inputs and Outputs
input_list = [input_text, language_set, font_size, theme, canvas_size,llm_translation]
output_list = [download_video, play_video]
# Bind Functions to UI Elements
input_text.submit(generate_video, inputs=input_list, outputs=output_list)
generate_btn.click(generate_video, inputs=input_list, outputs=output_list)
# Add examples
gr.Examples(examples=dummy_examples, inputs=[input_text, language_set])
return demo
def prompt_maker(text, language_set="Foreign Languages"):
languages = {
"Foreign Languages": {
"en": "English", "zh-CN": "Mandarin Chinese", "hi": "Hindi", "es": "Spanish",
"fr": "French", "ar": "Standard Arabic", "bn": "Bengali", "pt": "Portuguese",
"ru": "Russian", "ur": "Urdu", "id": "Indonesian", "de": "German", "ja": "Japanese",
"pa": "Punjabi", "te": "Telugu", "tr": "Turkish", "ta": "Tamil", "vi": "Vietnamese", "ko": "Korean"
},
"Local Indian Languages": {
"en": "English","hi": "Hindi", "bn": "Bengali", "mr": "Marathi", "te": "Telugu", "ta": "Tamil",
"gu": "Gujarati", "ur": "Urdu", "kn": "Kannada", "or": "Odia", "pa": "Punjabi", "ml": "Malayalam",
"mai": "Maithili","ne": "Nepali","sa": "Sanskrit","doi": "Dogri","sd": "Sindhi"
}
}
selected_languages = languages.get(language_set, languages["Foreign Languages"])
prompt = f'Translate this English word "{text}" to different languages in JSON format. Only write the JSON text:\n'
prompt += "{\n"
prompt += ",\n".join([f' "{code}": ""' for code in selected_languages])
prompt += "\n}"
return prompt
def ui2():
with gr.Blocks() as demo:
gr.Markdown("## 🌍 LLM Translation")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label='πŸ“ Enter Your Name', lines=1, placeholder="Enter Your Name ...")
language_set = gr.Radio(
["Foreign Languages", "Local Indian Languages"],
label="🌍 Language Set",
value="Foreign Languages"
)
generate_btn = gr.Button('πŸš€ Generate', variant='primary')
with gr.Column():
output_text = gr.Textbox(label='πŸ“ Generated Prompt', lines=5)
generate_btn.click(prompt_maker, inputs=[input_text, language_set], outputs=output_text)
return demo
# Launch the app
def main(share=False, debug=True):
demo1 = ui()
demo2 = ui2()
demo=gr.TabbedInterface([demo1, demo2], ["Video Generation", "LLM Prompt"])
demo.queue().launch(debug=debug, share=share)
main()