Spaces:
Runtime error
Runtime error
Commit
·
cb85517
1
Parent(s):
e2d8d82
test 2
Browse files- app.py +6 -6
- diarization.py +16 -21
- transcribe.py +2 -2
app.py
CHANGED
@@ -3,24 +3,24 @@ from diarization import start_diarization
|
|
3 |
from transcribe import start_transcribe
|
4 |
import ffmpeg
|
5 |
import gradio as gr
|
6 |
-
import os
|
7 |
|
8 |
|
9 |
-
def prepare_input(input_file):
|
10 |
output_file = "input.wav"
|
|
|
11 |
ffmpeg.input(input_file).audio.output(
|
12 |
output_file, format="wav").run()
|
13 |
-
|
14 |
-
progress = gr.Progress()
|
15 |
start_diarization(output_file, progress)
|
16 |
-
|
|
|
17 |
|
18 |
|
19 |
video_interface = gr.Interface(
|
20 |
fn=prepare_input,
|
21 |
inputs=gr.Video(type="file"),
|
22 |
outputs="text",
|
23 |
-
title="Test
|
24 |
)
|
25 |
|
26 |
if __name__ == "__main__":
|
|
|
3 |
from transcribe import start_transcribe
|
4 |
import ffmpeg
|
5 |
import gradio as gr
|
|
|
6 |
|
7 |
|
8 |
+
def prepare_input(input_file, progress=gr.Progress()):
|
9 |
output_file = "input.wav"
|
10 |
+
progress(0.2, desc="Preparing video")
|
11 |
ffmpeg.input(input_file).audio.output(
|
12 |
output_file, format="wav").run()
|
13 |
+
progress(0.4, desc="Acquiring diarization")
|
|
|
14 |
start_diarization(output_file, progress)
|
15 |
+
progress(0.6, desc="Transcribing audio")
|
16 |
+
return start_transcribe(progress)
|
17 |
|
18 |
|
19 |
video_interface = gr.Interface(
|
20 |
fn=prepare_input,
|
21 |
inputs=gr.Video(type="file"),
|
22 |
outputs="text",
|
23 |
+
title="Test 2"
|
24 |
)
|
25 |
|
26 |
if __name__ == "__main__":
|
diarization.py
CHANGED
@@ -5,8 +5,7 @@ import os
|
|
5 |
import torch
|
6 |
import json
|
7 |
|
8 |
-
|
9 |
-
hugging_face_token = "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"
|
10 |
pipeline = Pipeline.from_pretrained(
|
11 |
'pyannote/speaker-diarization', use_auth_token=hugging_face_token)
|
12 |
device = torch.device("cuda")
|
@@ -14,29 +13,25 @@ pipeline.to(device)
|
|
14 |
|
15 |
|
16 |
def start_diarization(input_file, progress: gr.Progress):
|
17 |
-
print("Starting diarization")
|
18 |
-
progress(0, desc="Starting diarization")
|
19 |
diarization = pipeline(input_file)
|
20 |
|
21 |
sample_groups = []
|
22 |
speaker_groups = {}
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
# print(f"speaker_groups {file_name}: {speaker_groups[file_name]}")
|
39 |
-
# print(f"start={turn.start:.3f}s stop={turn.end:.3f}s speaker_{speaker}")
|
40 |
|
41 |
save_groups_json(sample_groups, speaker_groups)
|
42 |
audio_segmentation(input_file, speaker_groups)
|
|
|
5 |
import torch
|
6 |
import json
|
7 |
|
8 |
+
hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
|
|
|
9 |
pipeline = Pipeline.from_pretrained(
|
10 |
'pyannote/speaker-diarization', use_auth_token=hugging_face_token)
|
11 |
device = torch.device("cuda")
|
|
|
13 |
|
14 |
|
15 |
def start_diarization(input_file, progress: gr.Progress):
|
|
|
|
|
16 |
diarization = pipeline(input_file)
|
17 |
|
18 |
sample_groups = []
|
19 |
speaker_groups = {}
|
20 |
+
iterables = diarization.itertracks(yield_label=True)
|
21 |
+
for turn, _, speaker, _ in zip(iterables, progress.tqdm(iterables, desc="Processing diarization")):
|
22 |
+
if (speaker not in sample_groups):
|
23 |
+
sample_groups.append(str(speaker))
|
24 |
+
|
25 |
+
suffix = 1
|
26 |
+
file_name = f"{speaker}-{suffix}"
|
27 |
+
while file_name in speaker_groups:
|
28 |
+
suffix += 1
|
29 |
+
file_name = f"{speaker}-{suffix}"
|
30 |
+
speaker_groups[file_name] = [turn.start, turn.end]
|
31 |
+
|
32 |
+
print(f"speaker_groups {file_name}: {speaker_groups[file_name]}")
|
33 |
+
print(
|
34 |
+
f"start={turn.start:.3f}s stop={turn.end:.3f}s speaker_{speaker}")
|
|
|
|
|
35 |
|
36 |
save_groups_json(sample_groups, speaker_groups)
|
37 |
audio_segmentation(input_file, speaker_groups)
|
transcribe.py
CHANGED
@@ -10,8 +10,8 @@ model = WhisperModel("medium", device="cuda", compute_type="int8_float16")
|
|
10 |
|
11 |
|
12 |
def start_transcribe(progress):
|
13 |
-
|
14 |
-
for speaker in speaker_groups:
|
15 |
# Transcribe and save temp file
|
16 |
audiof = f"{speaker}.wav"
|
17 |
print(f"Loading {audiof}")
|
|
|
10 |
|
11 |
|
12 |
def start_transcribe(progress):
|
13 |
+
_, speaker_groups = load_groups_json()
|
14 |
+
for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Processing diarization")):
|
15 |
# Transcribe and save temp file
|
16 |
audiof = f"{speaker}.wav"
|
17 |
print(f"Loading {audiof}")
|