qqwjq1981 commited on
Commit
28c6cdd
·
verified ·
1 Parent(s): 2081360

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -28
app.py CHANGED
@@ -126,34 +126,59 @@ def handle_feedback(feedback):
126
  conn.commit()
127
  return "Thank you for your feedback!", None
128
 
129
- def segment_background_audio(audio_path, output_path="background_segments.wav"):
130
- # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
131
- return True
132
 
133
- # pipeline = Pipeline.from_pretrained(
134
- # "pyannote/voice-activity-detection",
135
- # use_auth_token=hf_api_key
136
- # )
137
- # # Step 3: Run VAD to get speech segments
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  # vad_result = pipeline(audio_path)
139
- # print(f"Detected speech segments: {vad_result}")
140
 
141
- # # Step 4: Load full audio and subtract speech segments
142
  # full_audio = AudioSegment.from_wav(audio_path)
143
- # background_audio = AudioSegment.silent(duration=len(full_audio))
 
 
 
 
144
 
145
  # for segment in vad_result.itersegments():
146
- # start_ms = int(segment.start * 1000)
147
- # end_ms = int(segment.end * 1000)
148
- # # Remove speech by muting that portion
149
- # background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
150
 
151
- # # Step 5: Subtract background_audio from full_audio
152
- # result_audio = full_audio.overlay(background_audio)
153
 
154
- # # Step 6: Export non-speech segments
155
- # result_audio.export(output_path, format="wav")
156
- # print(f"Saved non-speech (background) audio to: {output_path}")
 
 
 
 
 
 
 
 
 
 
157
 
158
 
159
  def transcribe_video_with_speakers(video_path):
@@ -475,14 +500,9 @@ def add_transcript_voiceover(video_path, translated_json, output_path, add_voice
475
 
476
  final_video = CompositeVideoClip([video] + text_clips)
477
 
478
- if add_voiceover:
479
- if audio_segments:
480
- final_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
481
- final_video = final_video.set_audio(final_audio)
482
- else:
483
- logger.warning("⚠️ No audio segments available. Adding silent fallback.")
484
- silent_audio = AudioClip(lambda t: 0, duration=video.duration)
485
- final_video = final_video.set_audio(silent_audio)
486
 
487
  logger.info(f"Saving the final video to: {output_path}")
488
  final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
 
126
  conn.commit()
127
  return "Thank you for your feedback!", None
128
 
129
+ def segment_background_audio(audio_path, output_path="background_segments.wav", hf_token=None):
130
+ return 10
 
131
 
132
+ # """
133
+ # Detects and extracts non-speech (background) segments from audio using pyannote VAD.
134
+
135
+ # Parameters:
136
+ # - audio_path (str): Path to input audio (.wav).
137
+ # - output_path (str): Path to save the output non-speech audio.
138
+ # - hf_token (str): Hugging Face auth token for pyannote.
139
+
140
+ # Returns:
141
+ # - List of non-speech timestamp tuples (start, end) in seconds.
142
+ # """
143
+ # if not hf_token:
144
+ # raise ValueError("Hugging Face token is required for pyannote pipeline.")
145
+
146
+ # # Step 1: Load pipeline
147
+ # pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection", use_auth_token=hf_token)
148
+
149
+ # # Step 2: Apply VAD to get speech segments
150
  # vad_result = pipeline(audio_path)
151
+ # print(" Speech segments detected.")
152
 
153
+ # # Step 3: Get full duration of the audio
154
  # full_audio = AudioSegment.from_wav(audio_path)
155
+ # full_duration_sec = len(full_audio) / 1000.0
156
+
157
+ # # Step 4: Compute non-speech segments
158
+ # background_segments = []
159
+ # current_time = 0.0
160
 
161
  # for segment in vad_result.itersegments():
162
+ # if current_time < segment.start:
163
+ # background_segments.append((current_time, segment.start))
164
+ # current_time = segment.end
 
165
 
166
+ # if current_time < full_duration_sec:
167
+ # background_segments.append((current_time, full_duration_sec))
168
 
169
+ # print(f"🕒 Non-speech segments: {background_segments}")
170
+
171
+ # # Step 5: Extract and combine non-speech segments
172
+ # non_speech_audio = AudioSegment.empty()
173
+ # for start, end in background_segments:
174
+ # segment = full_audio[int(start * 1000):int(end * 1000)]
175
+ # non_speech_audio += segment
176
+
177
+ # # Step 6: Export the non-speech audio
178
+ # non_speech_audio.export(output_path, format="wav")
179
+ # print(f"🎵 Non-speech audio saved to: {output_path}")
180
+
181
+ # return background_segments
182
 
183
 
184
  def transcribe_video_with_speakers(video_path):
 
500
 
501
  final_video = CompositeVideoClip([video] + text_clips)
502
 
503
+ if add_voiceover and audio_segments:
504
+ final_audio = CompositeAudioClip(audio_segments).set_duration(video.duration)
505
+ final_video = final_video.set_audio(final_audio)
 
 
 
 
 
506
 
507
  logger.info(f"Saving the final video to: {output_path}")
508
  final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")