qqwjq1981 commited on
Commit
9eecb1f
·
verified ·
1 Parent(s): 515a3f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -30
app.py CHANGED
@@ -136,34 +136,34 @@ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %
136
  logger = logging.getLogger(__name__)
137
  logger.info(f"MoviePy Version: {moviepy.__version__}")
138
 
139
- def segment_background_audio(audio_path, output_path="background_segments.wav"):
140
- # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
141
- pipeline = Pipeline.from_pretrained(
142
- "pyannote/voice-activity-detection",
143
- use_auth_token=hf_api_key
144
- )
145
- # Step 3: Run VAD to get speech segments
146
- vad_result = pipeline(audio_path)
147
- print(f"Detected speech segments: {vad_result}")
148
-
149
- # Step 4: Load full audio and subtract speech segments
150
- full_audio = AudioSegment.from_wav(audio_path)
151
- background_audio = AudioSegment.silent(duration=len(full_audio))
152
-
153
- for segment in vad_result.itersegments():
154
- start_ms = int(segment.start * 1000)
155
- end_ms = int(segment.end * 1000)
156
- # Remove speech by muting that portion
157
- background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
158
-
159
- # Step 5: Subtract background_audio from full_audio
160
- result_audio = full_audio.overlay(background_audio)
161
-
162
- # Step 6: Export non-speech segments
163
- result_audio.export(output_path, format="wav")
164
- print(f"Saved non-speech (background) audio to: {output_path}")
165
-
166
- return True
167
 
168
  def transcribe_video_with_speakers(video_path):
169
  # Extract audio from video
@@ -172,8 +172,8 @@ def transcribe_video_with_speakers(video_path):
172
  video.audio.write_audiofile(audio_path)
173
  logger.info(f"Audio extracted from video: {audio_path}")
174
 
175
- segment_result = segment_background_audio(audio_path)
176
- print(f"Saved non-speech (background) audio to local")
177
 
178
  # Set up device
179
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
136
  logger = logging.getLogger(__name__)
137
  logger.info(f"MoviePy Version: {moviepy.__version__}")
138
 
139
+ # def segment_background_audio(audio_path, output_path="background_segments.wav"):
140
+ # # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
141
+ # pipeline = Pipeline.from_pretrained(
142
+ # "pyannote/voice-activity-detection",
143
+ # use_auth_token=hf_api_key
144
+ # )
145
+ # # Step 3: Run VAD to get speech segments
146
+ # vad_result = pipeline(audio_path)
147
+ # print(f"Detected speech segments: {vad_result}")
148
+
149
+ # # Step 4: Load full audio and subtract speech segments
150
+ # full_audio = AudioSegment.from_wav(audio_path)
151
+ # background_audio = AudioSegment.silent(duration=len(full_audio))
152
+
153
+ # for segment in vad_result.itersegments():
154
+ # start_ms = int(segment.start * 1000)
155
+ # end_ms = int(segment.end * 1000)
156
+ # # Remove speech by muting that portion
157
+ # background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
158
+
159
+ # # Step 5: Subtract background_audio from full_audio
160
+ # result_audio = full_audio.overlay(background_audio)
161
+
162
+ # # Step 6: Export non-speech segments
163
+ # result_audio.export(output_path, format="wav")
164
+ # print(f"Saved non-speech (background) audio to: {output_path}")
165
+
166
+ # return True
167
 
168
  def transcribe_video_with_speakers(video_path):
169
  # Extract audio from video
 
172
  video.audio.write_audiofile(audio_path)
173
  logger.info(f"Audio extracted from video: {audio_path}")
174
 
175
+ # segment_result = segment_background_audio(audio_path)
176
+ # print(f"Saved non-speech (background) audio to local")
177
 
178
  # Set up device
179
  device = "cuda" if torch.cuda.is_available() else "cpu"