Spaces:

Curify
/

studio_V1

Sleeping

App Files Files Community

qqwjq1981 commited on Apr 2

Commit

9eecb1f

verified ·

1 Parent(s): 515a3f9

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -30

app.py CHANGED Viewed

@@ -136,34 +136,34 @@ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %
 logger = logging.getLogger(__name__)
 logger.info(f"MoviePy Version: {moviepy.__version__}")
-def segment_background_audio(audio_path, output_path="background_segments.wav"):
-    # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
-    pipeline = Pipeline.from_pretrained(
-        "pyannote/voice-activity-detection",
-        use_auth_token=hf_api_key
-    )
-    # Step 3: Run VAD to get speech segments
-    vad_result = pipeline(audio_path)
-    print(f"Detected speech segments: {vad_result}")
-    # Step 4: Load full audio and subtract speech segments
-    full_audio = AudioSegment.from_wav(audio_path)
-    background_audio = AudioSegment.silent(duration=len(full_audio))
-    for segment in vad_result.itersegments():
-        start_ms = int(segment.start * 1000)
-        end_ms = int(segment.end * 1000)
-        # Remove speech by muting that portion
-        background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
-    # Step 5: Subtract background_audio from full_audio
-    result_audio = full_audio.overlay(background_audio)
-    # Step 6: Export non-speech segments
-    result_audio.export(output_path, format="wav")
-    print(f"Saved non-speech (background) audio to: {output_path}")
-    return True
 def transcribe_video_with_speakers(video_path):
     # Extract audio from video
@@ -172,8 +172,8 @@ def transcribe_video_with_speakers(video_path):
     video.audio.write_audiofile(audio_path)
     logger.info(f"Audio extracted from video: {audio_path}")
-    segment_result = segment_background_audio(audio_path)
-    print(f"Saved non-speech (background) audio to local")
     # Set up device
     device = "cuda" if torch.cuda.is_available() else "cpu"

 logger = logging.getLogger(__name__)
 logger.info(f"MoviePy Version: {moviepy.__version__}")
+# def segment_background_audio(audio_path, output_path="background_segments.wav"):
+#     # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
+#     pipeline = Pipeline.from_pretrained(
+#         "pyannote/voice-activity-detection",
+#         use_auth_token=hf_api_key
+#     )
+#     # Step 3: Run VAD to get speech segments
+#     vad_result = pipeline(audio_path)
+#     print(f"Detected speech segments: {vad_result}")
+#     # Step 4: Load full audio and subtract speech segments
+#     full_audio = AudioSegment.from_wav(audio_path)
+#     background_audio = AudioSegment.silent(duration=len(full_audio))
+#     for segment in vad_result.itersegments():
+#         start_ms = int(segment.start * 1000)
+#         end_ms = int(segment.end * 1000)
+#         # Remove speech by muting that portion
+#         background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
+#     # Step 5: Subtract background_audio from full_audio
+#     result_audio = full_audio.overlay(background_audio)
+#     # Step 6: Export non-speech segments
+#     result_audio.export(output_path, format="wav")
+#     print(f"Saved non-speech (background) audio to: {output_path}")
+#     return True
 def transcribe_video_with_speakers(video_path):
     # Extract audio from video
     video.audio.write_audiofile(audio_path)
     logger.info(f"Audio extracted from video: {audio_path}")
+    # segment_result = segment_background_audio(audio_path)
+    # print(f"Saved non-speech (background) audio to local")
     # Set up device
     device = "cuda" if torch.cuda.is_available() else "cpu"