Update app.py
Browse files
app.py
CHANGED
@@ -47,13 +47,6 @@ def transcribe_video(video_path):
|
|
47 |
|
48 |
return timestamps
|
49 |
|
50 |
-
def save_transcript_to_json(timestamps, json_file):
|
51 |
-
with open(json_file, 'w') as f:
|
52 |
-
json.dump(timestamps, f, indent=4)
|
53 |
-
|
54 |
-
# Initialize the translation pipeline
|
55 |
-
translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-{target_language}")
|
56 |
-
|
57 |
# Function to get the appropriate translation model based on target language
|
58 |
def get_translation_model(target_language):
|
59 |
# Map of target languages to their corresponding model names
|
@@ -65,30 +58,27 @@ def get_translation_model(target_language):
|
|
65 |
}
|
66 |
return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr") # Default to French if not found
|
67 |
|
68 |
-
def translate_text(
|
69 |
# Load the translation model for the specified target language
|
70 |
translation_model_id = get_translation_model(target_language)
|
71 |
logger.debug(f"Translation model: {translation_model_id}")
|
72 |
translator = pipeline("translation", model=translation_model_id)
|
73 |
|
74 |
-
# Parse the input JSON
|
75 |
-
timestamps = json.loads(timestamps_json)
|
76 |
-
|
77 |
# Prepare output structure
|
78 |
-
|
79 |
|
80 |
# Translate each sentence and store it with its start time
|
81 |
-
for entry in
|
82 |
original_text = entry["text"]
|
83 |
translated_text = translator(original_text)[0]['translation_text']
|
84 |
-
|
85 |
"start": entry["start"],
|
86 |
"original": original_text,
|
87 |
"translated": translated_text
|
88 |
})
|
89 |
|
90 |
# Return the translated timestamps as a JSON string
|
91 |
-
return json.dumps(
|
92 |
|
93 |
def add_transcript_to_video(video_path, timestamps, output_path):
|
94 |
# Load the video file
|
@@ -130,19 +120,15 @@ def upload_and_manage(file, platform, language):
|
|
130 |
|
131 |
# Define paths for audio and output files
|
132 |
audio_path = "audio.wav"
|
133 |
-
json_file = "transcript.json"
|
134 |
output_video_path = "output_video.mp4"
|
135 |
|
136 |
# Transcribe audio from uploaded media file and get timestamps
|
137 |
-
|
138 |
-
|
139 |
-
# Save transcript to JSON
|
140 |
-
save_transcript_to_json(timestamps, json_file)
|
141 |
|
142 |
-
|
143 |
|
144 |
# Add transcript to video based on timestamps
|
145 |
-
add_transcript_to_video(file.name,
|
146 |
|
147 |
# Mock posting action (you can implement this as needed)
|
148 |
post_message = mock_post_to_platform(platform, file.name)
|
@@ -150,7 +136,7 @@ def upload_and_manage(file, platform, language):
|
|
150 |
# Mock analytics generation
|
151 |
analytics = mock_analytics()
|
152 |
|
153 |
-
return post_message,
|
154 |
|
155 |
def generate_dashboard(analytics):
|
156 |
if not analytics:
|
@@ -177,8 +163,8 @@ def build_interface():
|
|
177 |
|
178 |
with gr.Row():
|
179 |
post_output = gr.Textbox(label="Posting Status", interactive=False)
|
180 |
-
transcription_output = gr.Textbox(label="Transcription
|
181 |
-
|
182 |
|
183 |
submit_button.click(upload_and_manage,
|
184 |
inputs=[file_input, platform_input, language_input],
|
|
|
47 |
|
48 |
return timestamps
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Function to get the appropriate translation model based on target language
|
51 |
def get_translation_model(target_language):
|
52 |
# Map of target languages to their corresponding model names
|
|
|
58 |
}
|
59 |
return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr") # Default to French if not found
|
60 |
|
61 |
+
def translate_text(transcription_json, target_language):
|
62 |
# Load the translation model for the specified target language
|
63 |
translation_model_id = get_translation_model(target_language)
|
64 |
logger.debug(f"Translation model: {translation_model_id}")
|
65 |
translator = pipeline("translation", model=translation_model_id)
|
66 |
|
|
|
|
|
|
|
67 |
# Prepare output structure
|
68 |
+
translated_json = []
|
69 |
|
70 |
# Translate each sentence and store it with its start time
|
71 |
+
for entry in transcription_json:
|
72 |
original_text = entry["text"]
|
73 |
translated_text = translator(original_text)[0]['translation_text']
|
74 |
+
translated_json.append({
|
75 |
"start": entry["start"],
|
76 |
"original": original_text,
|
77 |
"translated": translated_text
|
78 |
})
|
79 |
|
80 |
# Return the translated timestamps as a JSON string
|
81 |
+
return json.dumps(translated_json, indent=4)
|
82 |
|
83 |
def add_transcript_to_video(video_path, timestamps, output_path):
|
84 |
# Load the video file
|
|
|
120 |
|
121 |
# Define paths for audio and output files
|
122 |
audio_path = "audio.wav"
|
|
|
123 |
output_video_path = "output_video.mp4"
|
124 |
|
125 |
# Transcribe audio from uploaded media file and get timestamps
|
126 |
+
transcrption_json = transcribe_video(file.name)
|
|
|
|
|
|
|
127 |
|
128 |
+
translated_json = translate_text(transcrption_json, language)
|
129 |
|
130 |
# Add transcript to video based on timestamps
|
131 |
+
add_transcript_to_video(file.name, translated_json, output_video_path)
|
132 |
|
133 |
# Mock posting action (you can implement this as needed)
|
134 |
post_message = mock_post_to_platform(platform, file.name)
|
|
|
136 |
# Mock analytics generation
|
137 |
analytics = mock_analytics()
|
138 |
|
139 |
+
return post_message, transcrption_json, translated_json, analytics
|
140 |
|
141 |
def generate_dashboard(analytics):
|
142 |
if not analytics:
|
|
|
163 |
|
164 |
with gr.Row():
|
165 |
post_output = gr.Textbox(label="Posting Status", interactive=False)
|
166 |
+
transcription_output = gr.Textbox(label="Transcription JSON File", interactive=False)
|
167 |
+
translated_output = gr.Textbox(label="Translated JSON File", interactive=False)
|
168 |
|
169 |
submit_button.click(upload_and_manage,
|
170 |
inputs=[file_input, platform_input, language_input],
|