Update app.py
Browse files
app.py
CHANGED
@@ -136,34 +136,34 @@ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %
|
|
136 |
logger = logging.getLogger(__name__)
|
137 |
logger.info(f"MoviePy Version: {moviepy.__version__}")
|
138 |
|
139 |
-
def segment_background_audio(audio_path, output_path="background_segments.wav"):
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
|
168 |
def transcribe_video_with_speakers(video_path):
|
169 |
# Extract audio from video
|
@@ -172,8 +172,8 @@ def transcribe_video_with_speakers(video_path):
|
|
172 |
video.audio.write_audiofile(audio_path)
|
173 |
logger.info(f"Audio extracted from video: {audio_path}")
|
174 |
|
175 |
-
segment_result = segment_background_audio(audio_path)
|
176 |
-
print(f"Saved non-speech (background) audio to local")
|
177 |
|
178 |
# Set up device
|
179 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
136 |
logger = logging.getLogger(__name__)
|
137 |
logger.info(f"MoviePy Version: {moviepy.__version__}")
|
138 |
|
139 |
+
# def segment_background_audio(audio_path, output_path="background_segments.wav"):
|
140 |
+
# # Step 2: Initialize pyannote voice activity detection pipeline (you need Hugging Face token)
|
141 |
+
# pipeline = Pipeline.from_pretrained(
|
142 |
+
# "pyannote/voice-activity-detection",
|
143 |
+
# use_auth_token=hf_api_key
|
144 |
+
# )
|
145 |
+
# # Step 3: Run VAD to get speech segments
|
146 |
+
# vad_result = pipeline(audio_path)
|
147 |
+
# print(f"Detected speech segments: {vad_result}")
|
148 |
+
|
149 |
+
# # Step 4: Load full audio and subtract speech segments
|
150 |
+
# full_audio = AudioSegment.from_wav(audio_path)
|
151 |
+
# background_audio = AudioSegment.silent(duration=len(full_audio))
|
152 |
+
|
153 |
+
# for segment in vad_result.itersegments():
|
154 |
+
# start_ms = int(segment.start * 1000)
|
155 |
+
# end_ms = int(segment.end * 1000)
|
156 |
+
# # Remove speech by muting that portion
|
157 |
+
# background_audio = background_audio.overlay(AudioSegment.silent(duration=end_ms - start_ms), position=start_ms)
|
158 |
+
|
159 |
+
# # Step 5: Subtract background_audio from full_audio
|
160 |
+
# result_audio = full_audio.overlay(background_audio)
|
161 |
+
|
162 |
+
# # Step 6: Export non-speech segments
|
163 |
+
# result_audio.export(output_path, format="wav")
|
164 |
+
# print(f"Saved non-speech (background) audio to: {output_path}")
|
165 |
+
|
166 |
+
# return True
|
167 |
|
168 |
def transcribe_video_with_speakers(video_path):
|
169 |
# Extract audio from video
|
|
|
172 |
video.audio.write_audiofile(audio_path)
|
173 |
logger.info(f"Audio extracted from video: {audio_path}")
|
174 |
|
175 |
+
# segment_result = segment_background_audio(audio_path)
|
176 |
+
# print(f"Saved non-speech (background) audio to local")
|
177 |
|
178 |
# Set up device
|
179 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|