File size: 7,582 Bytes
e793ef5 eea784a c7ac97c 219f1d3 9d68248 1acdf97 d5c151e 9c80aa8 1acdf97 d5c151e 9d68248 219f1d3 9d68248 9c80aa8 9d68248 9c80aa8 9d68248 2b22edd a6cf7f8 9f53b30 9d68248 2b22edd 1acdf97 2b22edd 9d68248 9f53b30 9d68248 9f53b30 9d68248 9f53b30 9d68248 9c80aa8 9d68248 dd9a5ec 9d68248 07652a2 9d68248 bbd65e5 9d68248 219f1d3 9d68248 dd9a5ec 50824cf 20bf5db 50824cf 9d68248 219f1d3 9d68248 eea784a 9d68248 9f53b30 c7ac97c 9f53b30 82ea3c8 9d68248 9f53b30 eea784a 9d68248 eea784a 9f53b30 eea784a 25a5386 eea784a c7ac97c eea784a a8a7a99 25a5386 eea784a e793ef5 9d68248 eea784a 9d68248 |
|
import gradio as gr
from datetime import datetime
import random
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
from moviepy import (
ImageClip,
VideoFileClip,
TextClip,
CompositeVideoClip,
AudioFileClip,
concatenate_videoclips
)
import speech_recognition as sr
import json
from nltk.tokenize import sent_tokenize
import logging
from textblob import TextBlob
import whisper
# Configure logging
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
def split_into_sentences(text):
blob = TextBlob(text)
return [str(sentence) for sentence in blob.sentences]
def transcribe_video(video_path):
# Load the video file and extract audio
video = VideoFileClip(video_path)
audio_path = "audio.wav"
video.audio.write_audiofile(audio_path)
# Load Whisper model
model = whisper.load_model("base") # Options: tiny, base, small, medium, large
# Transcribe with Whisper
result = model.transcribe(audio_path, word_timestamps=True)
# Extract timestamps and text
transcript_with_timestamps = [
{
"start": segment["start"],
"end": segment["end"],
"text": segment["text"]
}
for segment in result["segments"]
]
return transcript_with_timestamps
# Function to get the appropriate translation model based on target language
def get_translation_model(target_language):
# Map of target languages to their corresponding model names
model_map = {
"es": "Helsinki-NLP/opus-mt-en-es", # English to Spanish
"fr": "Helsinki-NLP/opus-mt-en-fr", # English to French
"zh": "Helsinki-NLP/opus-mt-en-zh", # English to Chinese
# Add more languages as needed
}
return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr") # Default to French if not found
def translate_text(transcription_json, target_language):
# Load the translation model for the specified target language
translation_model_id = get_translation_model(target_language)
logger.debug(f"Translation model: {translation_model_id}")
translator = pipeline("translation", model=translation_model_id)
# Prepare output structure
translated_json = []
# Translate each sentence and store it with its start time
for entry in transcription_json:
original_text = entry["text"]
translated_text = translator(original_text)[0]['translation_text']
translated_json.append({
"start": entry["start"],
"original": original_text,
"translated": translated_text,
"end": entry["end"]
})
# Log the components being added to translated_json
logger.debug("Adding to translated_json: start=%s, original=%s, translated=%s, end=%s",
entry["start"], original_text, translated_text, entry["end"])
# Return the translated timestamps as a JSON string
return translated_json
def add_transcript_to_video(video_path, translated_json, output_path):
# Load the video file
video = VideoFileClip(video_path)
# Create text clips based on timestamps
text_clips = []
logger.debug("Full translated_json: %s", translated_json)
for entry in translated_json:
logger.debug("Processing entry: %s", entry)
for entry in translated_json:
# Ensure `entry` is a dictionary with keys "start", "end", and "translated"
if isinstance(entry, dict) and "translated" in entry:
txt_clip = TextClip(
entry["translated"], method='caption', color='white', bg_color='black', size=video.size
).set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_position(('bottom')).set_opacity(0.7)
text_clips.append(txt_clip)
else:
raise ValueError(f"Invalid entry format: {entry}")
# Overlay all text clips on the original video
final_video = CompositeVideoClip([video] + text_clips)
# Write the result to a file
final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
# Mock functions for platform actions and analytics
def mock_post_to_platform(platform, content_title):
return f"Content '{content_title}' successfully posted on {platform}!"
def mock_analytics():
return {
"YouTube": {"Views": random.randint(1000, 5000), "Engagement Rate": f"{random.uniform(5, 15):.2f}%"},
"Instagram": {"Views": random.randint(500, 3000), "Engagement Rate": f"{random.uniform(10, 20):.2f}%"},
}
# Core functionalities
def upload_and_manage(file, platform, language):
if file is None:
return "Please upload a video/audio file.", None, None, None
# Define paths for audio and output files
audio_path = "audio.wav"
output_video_path = "output_video.mp4"
# Transcribe audio from uploaded media file and get timestamps
transcrption_json = transcribe_video(file.name)
translated_json = translate_text(transcrption_json, language)
# Add transcript to video based on timestamps
add_transcript_to_video(file.name, translated_json, output_video_path)
# Mock posting action (you can implement this as needed)
post_message = mock_post_to_platform(platform, file.name)
# Mock analytics generation
analytics = mock_analytics()
return post_message, transcrption_json, translated_json, analytics
def generate_dashboard(analytics):
if not analytics:
return "No analytics available."
dashboard = "Platform Analytics:\n"
for platform, data in analytics.items():
dashboard += f"\n{platform}:\n"
for metric, value in data.items():
dashboard += f" {metric}: {value}\n"
return dashboard
# Gradio Interface with Tabs
# Gradio Interface with Tabs
def build_interface():
with gr.Blocks() as demo:
with gr.Tab("Content Management"):
gr.Markdown("## Integrated Content Management")
with gr.Row():
file_input = gr.File(label="Upload Video/Audio File")
platform_input = gr.Dropdown(["YouTube", "Instagram"], label="Select Platform")
language_input = gr.Dropdown(["en", "es", "fr", "zh"], label="Select Language") # Language codes
submit_button = gr.Button("Post and Process")
with gr.Row():
post_output = gr.Textbox(label="Posting Status", interactive=False)
transcription_output = gr.JSON(label="Transcription JSON File")
translated_output = gr.JSON(label="Translated JSON File")
with gr.Row():
processed_video_output = gr.File(label="Download Processed Video", interactive=False) # Download button
submit_button.click(
upload_and_manage,
inputs=[file_input, platform_input, language_input],
outputs=[post_output, transcription_output, translated_output, processed_video_output]
)
with gr.Tab("Analytics Dashboard"):
gr.Markdown("## Content Performance Analytics")
analytics_output = gr.Textbox(label="Dashboard", interactive=False)
generate_dashboard_button = gr.Button("Generate Dashboard")
generate_dashboard_button.click(generate_dashboard, inputs=[gr.State()], outputs=[analytics_output])
return demo
# Launch the Gradio interface
demo = build_interface()
demo.launch() |