qqwjq1981 commited on
Commit
9f53b30
·
verified ·
1 Parent(s): 82ea3c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -25
app.py CHANGED
@@ -47,13 +47,6 @@ def transcribe_video(video_path):
47
 
48
  return timestamps
49
 
50
- def save_transcript_to_json(timestamps, json_file):
51
- with open(json_file, 'w') as f:
52
- json.dump(timestamps, f, indent=4)
53
-
54
- # Initialize the translation pipeline
55
- translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-{target_language}")
56
-
57
  # Function to get the appropriate translation model based on target language
58
  def get_translation_model(target_language):
59
  # Map of target languages to their corresponding model names
@@ -65,30 +58,27 @@ def get_translation_model(target_language):
65
  }
66
  return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr") # Default to French if not found
67
 
68
- def translate_text(timestamps_json, target_language):
69
  # Load the translation model for the specified target language
70
  translation_model_id = get_translation_model(target_language)
71
  logger.debug(f"Translation model: {translation_model_id}")
72
  translator = pipeline("translation", model=translation_model_id)
73
 
74
- # Parse the input JSON
75
- timestamps = json.loads(timestamps_json)
76
-
77
  # Prepare output structure
78
- translated_timestamps = []
79
 
80
  # Translate each sentence and store it with its start time
81
- for entry in timestamps:
82
  original_text = entry["text"]
83
  translated_text = translator(original_text)[0]['translation_text']
84
- translated_timestamps.append({
85
  "start": entry["start"],
86
  "original": original_text,
87
  "translated": translated_text
88
  })
89
 
90
  # Return the translated timestamps as a JSON string
91
- return json.dumps(translated_timestamps, indent=4)
92
 
93
  def add_transcript_to_video(video_path, timestamps, output_path):
94
  # Load the video file
@@ -130,19 +120,15 @@ def upload_and_manage(file, platform, language):
130
 
131
  # Define paths for audio and output files
132
  audio_path = "audio.wav"
133
- json_file = "transcript.json"
134
  output_video_path = "output_video.mp4"
135
 
136
  # Transcribe audio from uploaded media file and get timestamps
137
- timestamps = transcribe_video(file.name)
138
-
139
- # Save transcript to JSON
140
- save_transcript_to_json(timestamps, json_file)
141
 
142
- translated_timestamps = translate_text(timestamps, language)
143
 
144
  # Add transcript to video based on timestamps
145
- add_transcript_to_video(file.name, translated_timestamps, output_video_path)
146
 
147
  # Mock posting action (you can implement this as needed)
148
  post_message = mock_post_to_platform(platform, file.name)
@@ -150,7 +136,7 @@ def upload_and_manage(file, platform, language):
150
  # Mock analytics generation
151
  analytics = mock_analytics()
152
 
153
- return post_message, timestamps, json_file, analytics
154
 
155
  def generate_dashboard(analytics):
156
  if not analytics:
@@ -177,8 +163,8 @@ def build_interface():
177
 
178
  with gr.Row():
179
  post_output = gr.Textbox(label="Posting Status", interactive=False)
180
- transcription_output = gr.Textbox(label="Transcription Timestamps (JSON)", interactive=False)
181
- json_output = gr.Textbox(label="Transcript JSON File", interactive=False)
182
 
183
  submit_button.click(upload_and_manage,
184
  inputs=[file_input, platform_input, language_input],
 
47
 
48
  return timestamps
49
 
 
 
 
 
 
 
 
50
  # Function to get the appropriate translation model based on target language
51
  def get_translation_model(target_language):
52
  # Map of target languages to their corresponding model names
 
58
  }
59
  return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr") # Default to French if not found
60
 
61
+ def translate_text(transcription_json, target_language):
62
  # Load the translation model for the specified target language
63
  translation_model_id = get_translation_model(target_language)
64
  logger.debug(f"Translation model: {translation_model_id}")
65
  translator = pipeline("translation", model=translation_model_id)
66
 
 
 
 
67
  # Prepare output structure
68
+ translated_json = []
69
 
70
  # Translate each sentence and store it with its start time
71
+ for entry in transcription_json:
72
  original_text = entry["text"]
73
  translated_text = translator(original_text)[0]['translation_text']
74
+ translated_json.append({
75
  "start": entry["start"],
76
  "original": original_text,
77
  "translated": translated_text
78
  })
79
 
80
  # Return the translated timestamps as a JSON string
81
+ return json.dumps(translated_json, indent=4)
82
 
83
  def add_transcript_to_video(video_path, timestamps, output_path):
84
  # Load the video file
 
120
 
121
  # Define paths for audio and output files
122
  audio_path = "audio.wav"
 
123
  output_video_path = "output_video.mp4"
124
 
125
  # Transcribe audio from uploaded media file and get timestamps
126
+ transcrption_json = transcribe_video(file.name)
 
 
 
127
 
128
+ translated_json = translate_text(transcrption_json, language)
129
 
130
  # Add transcript to video based on timestamps
131
+ add_transcript_to_video(file.name, translated_json, output_video_path)
132
 
133
  # Mock posting action (you can implement this as needed)
134
  post_message = mock_post_to_platform(platform, file.name)
 
136
  # Mock analytics generation
137
  analytics = mock_analytics()
138
 
139
+ return post_message, transcrption_json, translated_json, analytics
140
 
141
  def generate_dashboard(analytics):
142
  if not analytics:
 
163
 
164
  with gr.Row():
165
  post_output = gr.Textbox(label="Posting Status", interactive=False)
166
+ transcription_output = gr.Textbox(label="Transcription JSON File", interactive=False)
167
+ translated_output = gr.Textbox(label="Translated JSON File", interactive=False)
168
 
169
  submit_button.click(upload_and_manage,
170
  inputs=[file_input, platform_input, language_input],