qqwjq1981 commited on
Commit
9d68248
·
verified ·
1 Parent(s): 7b0ce86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -35
app.py CHANGED
@@ -3,28 +3,41 @@ from datetime import datetime
3
  import random
4
  from transformers import pipeline
5
  from transformers.pipelines.audio_utils import ffmpeg_read
6
-
7
- # Initialize the Whisper pipeline
8
- whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
9
-
10
- def transcribe_audio_from_file(file_path):
11
- """
12
- Transcribes audio from a local file using the Whisper pipeline.
13
-
14
- Args:
15
- file_path (str): Path to the local media file.
16
-
17
- Returns:
18
- str: Transcription text if successful, otherwise None.
19
- """
20
- try:
21
- # Transcribe the audio using Whisper
22
- transcription = whisper_pipeline(file_path, return_timestamps=True)
23
- logger.debug(f"Transcription: {transcription['text']}")
24
- return transcription["text"]
25
- except Exception as e:
26
- logger.error(f"An error occurred during transcription: {e}")
27
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # Initialize the translation pipeline
30
  translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-{target_language}")
@@ -40,11 +53,52 @@ def get_translation_model(target_language):
40
  }
41
  return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr") # Default to French if not found
42
 
43
- # Example usage in your application
44
- def translate_text(text, target_language):
45
  translation_model_id = get_translation_model(target_language)
46
  translator = pipeline("translation", model=translation_model_id)
47
- return translator(text)[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # Mock functions for platform actions and analytics
50
  def mock_post_to_platform(platform, content_title):
@@ -61,19 +115,27 @@ def upload_and_manage(file, platform, language):
61
  if file is None:
62
  return "Please upload a video/audio file.", None, None, None
63
 
64
- # Transcribe audio from uploaded media file
65
- transcription = transcribe_audio_from_media_file(file.name)
 
 
 
 
 
 
 
 
66
 
67
- # Translate transcription to the selected language
68
- translation = translate_text(transcription, language)
69
 
70
- # Mock posting action
71
  post_message = mock_post_to_platform(platform, file.name)
72
 
73
  # Mock analytics generation
74
  analytics = mock_analytics()
75
 
76
- return post_message, transcription, translation, analytics
77
 
78
  def generate_dashboard(analytics):
79
  if not analytics:
@@ -100,12 +162,12 @@ def build_interface():
100
 
101
  with gr.Row():
102
  post_output = gr.Textbox(label="Posting Status", interactive=False)
103
- transcription_output = gr.Textbox(label="Transcription", interactive=False)
104
- translation_output = gr.Textbox(label="Translation", interactive=False)
105
 
106
  submit_button.click(upload_and_manage,
107
  inputs=[file_input, platform_input, language_input],
108
- outputs=[post_output, transcription_output, translation_output, gr.State()])
109
 
110
  with gr.Tab("Analytics Dashboard"):
111
  gr.Markdown("## Content Performance Analytics")
@@ -116,5 +178,6 @@ def build_interface():
116
 
117
  return demo
118
 
 
119
  demo = build_interface()
120
- demo.launch()
 
3
  import random
4
  from transformers import pipeline
5
  from transformers.pipelines.audio_utils import ffmpeg_read
6
+ import moviepy.editor as mp
7
+ import speech_recognition as sr
8
+ import json
9
+ from nltk.tokenize import sent_tokenize
10
+
11
+ def transcribe_video(video_path):
12
+ # Load the video file and extract audio
13
+ video = mp.VideoFileClip(video_path)
14
+ audio_path = "audio.wav"
15
+ video.audio.write_audiofile(audio_path)
16
+
17
+ # Initialize recognizer class (for recognizing the speech)
18
+ recognizer = sr.Recognizer()
19
+
20
+ # Use SpeechRecognition to transcribe audio
21
+ with sr.AudioFile(audio_path) as source:
22
+ audio_text = recognizer.record(source)
23
+ transcript = recognizer.recognize_google(audio_text)
24
+
25
+ # Split transcript into sentences
26
+ sentences = sent_tokenize(transcript)
27
+
28
+ # Create a list of timestamps for each sentence
29
+ timestamps = []
30
+ duration_per_sentence = len(audio_text.frame_data) / len(sentences) / 44100 # Approximate duration per sentence in seconds
31
+
32
+ for i, sentence in enumerate(sentences):
33
+ start_time = i * duration_per_sentence
34
+ timestamps.append({"start": start_time, "text": sentence})
35
+
36
+ return timestamps
37
+
38
+ def save_transcript_to_json(timestamps, json_file):
39
+ with open(json_file, 'w') as f:
40
+ json.dump(timestamps, f, indent=4)
41
 
42
  # Initialize the translation pipeline
43
  translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-{target_language}")
 
53
  }
54
  return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr") # Default to French if not found
55
 
56
+ def translate_text(timestamps_json, target_language):
57
+ # Load the translation model for the specified target language
58
  translation_model_id = get_translation_model(target_language)
59
  translator = pipeline("translation", model=translation_model_id)
60
+
61
+ # Parse the input JSON
62
+ timestamps = json.loads(timestamps_json)
63
+
64
+ # Prepare output structure
65
+ translated_timestamps = []
66
+
67
+ # Translate each sentence and store it with its start time
68
+ for entry in timestamps:
69
+ original_text = entry["text"]
70
+ translated_text = translator(original_text)[0]['translation_text']
71
+ translated_timestamps.append({
72
+ "start": entry["start"],
73
+ "original": original_text,
74
+ "translated": translated_text
75
+ })
76
+
77
+ # Return the translated timestamps as a JSON string
78
+ return json.dumps(translated_timestamps, indent=4)
79
+
80
+ def add_transcript_to_video(video_path, timestamps, output_path):
81
+ # Load the video file
82
+ video = mp.VideoFileClip(video_path)
83
+
84
+ # Create text clips based on timestamps
85
+ text_clips = []
86
+
87
+ for entry in timestamps:
88
+ # Create a text clip for each sentence
89
+ txt_clip = mp.TextClip(entry["text"], fontsize=24, color='white', bg_color='black', size=video.size)
90
+
91
+ # Set the start time and duration for each text clip
92
+ txt_clip = txt_clip.set_start(entry["start"]).set_duration(3).set_position(('bottom')).set_opacity(0.7) # Display each sentence for 3 seconds
93
+
94
+ # Append the text clip to the list
95
+ text_clips.append(txt_clip)
96
+
97
+ # Overlay all text clips on the original video
98
+ final_video = mp.CompositeVideoClip([video] + text_clips)
99
+
100
+ # Write the result to a file
101
+ final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
102
 
103
  # Mock functions for platform actions and analytics
104
  def mock_post_to_platform(platform, content_title):
 
115
  if file is None:
116
  return "Please upload a video/audio file.", None, None, None
117
 
118
+ # Define paths for audio and output files
119
+ audio_path = "audio.wav"
120
+ json_file = "transcript.json"
121
+ output_video_path = "output_video.mp4"
122
+
123
+ # Transcribe audio from uploaded media file and get timestamps
124
+ timestamps = transcribe_video(file.name)
125
+
126
+ # Save transcript to JSON
127
+ save_transcript_to_json(timestamps, json_file)
128
 
129
+ # Add transcript to video based on timestamps
130
+ add_transcript_to_video(file.name, timestamps, output_video_path)
131
 
132
+ # Mock posting action (you can implement this as needed)
133
  post_message = mock_post_to_platform(platform, file.name)
134
 
135
  # Mock analytics generation
136
  analytics = mock_analytics()
137
 
138
+ return post_message, timestamps, json_file, analytics
139
 
140
  def generate_dashboard(analytics):
141
  if not analytics:
 
162
 
163
  with gr.Row():
164
  post_output = gr.Textbox(label="Posting Status", interactive=False)
165
+ transcription_output = gr.Textbox(label="Transcription Timestamps (JSON)", interactive=False)
166
+ json_output = gr.Textbox(label="Transcript JSON File", interactive=False)
167
 
168
  submit_button.click(upload_and_manage,
169
  inputs=[file_input, platform_input, language_input],
170
+ outputs=[post_output, transcription_output, json_output, gr.State()])
171
 
172
  with gr.Tab("Analytics Dashboard"):
173
  gr.Markdown("## Content Performance Analytics")
 
178
 
179
  return demo
180
 
181
+ # Launch the Gradio interface
182
  demo = build_interface()
183
+ demo.launch()