chompionsawelo commited on
Commit
931df81
·
1 Parent(s): bada8ef

add transcribe

Browse files
Files changed (1) hide show
  1. transcribe.py +32 -0
transcribe.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import torch
3
+ import gc
4
+ import json
5
+
6
+ gc.collect()
7
+ torch.cuda.empty_cache()
8
+
9
+
10
+ device = torch.device("cuda")
11
+ model = whisper.load_model("medium", device=device)
12
+
13
+
14
+ def start_transcribe(progress):
15
+ sample_groups, speaker_groups = load_groups_json()
16
+ for speaker in speaker_groups:
17
+ # Transcribe and save temp file
18
+ audiof = f"{speaker}.wav"
19
+ print(f"Loading {audiof}")
20
+ result = model.transcribe(
21
+ audio=audiof, language='id', word_timestamps=True)
22
+ with open(f"{speaker}.json", "w") as text_file:
23
+ json.dump(result, text_file, indent=4)
24
+ return result['text']
25
+
26
+
27
+ def load_groups_json():
28
+ with open("sample_groups.json", "r") as json_file_sample:
29
+ sample_groups_list: list = json.load(json_file_sample)
30
+ with open("speaker_groups.json", "r") as json_file_speaker:
31
+ speaker_groups_dict: dict = json.load(json_file_speaker)
32
+ return sample_groups_list, speaker_groups_dict