Spaces:
Runtime error
Runtime error
Commit
·
32e4ded
1
Parent(s):
3130060
last adjusment
Browse files- app.py +4 -1
- list.py +0 -0
- transcribe.py +33 -6
app.py
CHANGED
@@ -16,10 +16,13 @@ def prepare_input(input_file, progress=gr.Progress()):
|
|
16 |
return start_transcribe(progress)
|
17 |
|
18 |
|
|
|
|
|
|
|
19 |
video_interface = gr.Interface(
|
20 |
fn=prepare_input,
|
21 |
inputs=gr.Video(type="file"),
|
22 |
-
outputs=
|
23 |
title="Test 2"
|
24 |
)
|
25 |
|
|
|
16 |
return start_transcribe(progress)
|
17 |
|
18 |
|
19 |
+
output_files = gr.outputs.File(
|
20 |
+
label="Transcribe"), gr.outputs.File(label="Subtitle")
|
21 |
+
|
22 |
video_interface = gr.Interface(
|
23 |
fn=prepare_input,
|
24 |
inputs=gr.Video(type="file"),
|
25 |
+
outputs=output_files,
|
26 |
title="Test 2"
|
27 |
)
|
28 |
|
list.py
ADDED
File without changes
|
transcribe.py
CHANGED
@@ -11,6 +11,7 @@ model = WhisperModel("medium", device="cuda", compute_type="int8_float16")
|
|
11 |
|
12 |
def start_transcribe(progress):
|
13 |
_, speaker_groups = load_groups_json()
|
|
|
14 |
for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Processing diarization")):
|
15 |
# Transcribe and save temp file
|
16 |
audiof = f"{speaker}.wav"
|
@@ -18,12 +19,38 @@ def start_transcribe(progress):
|
|
18 |
segments, _ = model.transcribe(
|
19 |
audio=audiof, language='id', word_timestamps=True)
|
20 |
segments_list = list(segments)
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
def load_groups_json():
|
|
|
11 |
|
12 |
def start_transcribe(progress):
|
13 |
_, speaker_groups = load_groups_json()
|
14 |
+
|
15 |
for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Processing diarization")):
|
16 |
# Transcribe and save temp file
|
17 |
audiof = f"{speaker}.wav"
|
|
|
19 |
segments, _ = model.transcribe(
|
20 |
audio=audiof, language='id', word_timestamps=True)
|
21 |
segments_list = list(segments)
|
22 |
+
|
23 |
+
text_list_to_print = []
|
24 |
+
for segment in segments_list:
|
25 |
+
start = timeStr(segment['start'])
|
26 |
+
end = timeStr(segment['end'])
|
27 |
+
name = str(speaker)[:10]
|
28 |
+
text = segment["text"]
|
29 |
+
subtitle_txt = f"{len(subtitle_txt) + 1}\n{start} --> {end}\n[{name}] {text}\n\n"
|
30 |
+
# Appending subtitle txt for each segment
|
31 |
+
with open("subtitle.srt", "a") as file:
|
32 |
+
file.writelines(subtitle_txt)
|
33 |
+
# Appending text for each segment to print
|
34 |
+
text_list_to_print.append(text)
|
35 |
+
|
36 |
+
# Print full text for each speaker turn
|
37 |
+
text = "\n".join(text_list_to_print)
|
38 |
+
print(text)
|
39 |
+
|
40 |
+
# Create transcribe per speaker
|
41 |
+
with open(f"{speaker}.json", "w") as text_file:
|
42 |
+
json.dump(segments_list, text_file, indent=4)
|
43 |
+
# Append to complete transcribe file
|
44 |
+
with open("transcribe.txt", "a") as file:
|
45 |
+
file.write(f"[{name}] {text}\n")
|
46 |
+
|
47 |
+
return ["subtitle.srt", "transcribe.txt"]
|
48 |
+
|
49 |
+
|
50 |
+
def timeStr(t):
|
51 |
+
return '{0:02d}:{1:02d}:{2:06.2f}'.format(round(t // 3600),
|
52 |
+
round(t % 3600 // 60),
|
53 |
+
t % 60)
|
54 |
|
55 |
|
56 |
def load_groups_json():
|