chompionsawelo commited on
Commit
3130060
·
1 Parent(s): d15328c

transcribe test

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. diarization.py +1 -1
  3. transcribe.py +8 -4
app.py CHANGED
@@ -11,7 +11,7 @@ def prepare_input(input_file, progress=gr.Progress()):
11
  ffmpeg.input(input_file).audio.output(
12
  output_file, format="wav").run()
13
  progress(0.4, desc="Acquiring diarization")
14
- start_diarization(output_file, progress)
15
  progress(0.6, desc="Transcribing audio")
16
  return start_transcribe(progress)
17
 
 
11
  ffmpeg.input(input_file).audio.output(
12
  output_file, format="wav").run()
13
  progress(0.4, desc="Acquiring diarization")
14
+ start_diarization(output_file)
15
  progress(0.6, desc="Transcribing audio")
16
  return start_transcribe(progress)
17
 
diarization.py CHANGED
@@ -12,7 +12,7 @@ device = torch.device("cuda")
12
  pipeline.to(device)
13
 
14
 
15
- def start_diarization(input_file, progress: gr.Progress):
16
  diarization = pipeline(input_file)
17
 
18
  sample_groups = []
 
12
  pipeline.to(device)
13
 
14
 
15
+ def start_diarization(input_file):
16
  diarization = pipeline(input_file)
17
 
18
  sample_groups = []
transcribe.py CHANGED
@@ -15,11 +15,15 @@ def start_transcribe(progress):
15
  # Transcribe and save temp file
16
  audiof = f"{speaker}.wav"
17
  print(f"Loading {audiof}")
18
- result = model.transcribe(
19
  audio=audiof, language='id', word_timestamps=True)
20
- with open(f"{speaker}.json", "w") as text_file:
21
- json.dump(result, text_file, indent=4)
22
- return result['text']
 
 
 
 
23
 
24
 
25
  def load_groups_json():
 
15
  # Transcribe and save temp file
16
  audiof = f"{speaker}.wav"
17
  print(f"Loading {audiof}")
18
+ segments, _ = model.transcribe(
19
  audio=audiof, language='id', word_timestamps=True)
20
+ segments_list = list(segments)
21
+ print("SEGMENT LIST: " + str(segments_list))
22
+ text = segments_list.join(' ')
23
+ print("TEXT: " + str(text))
24
+ # with open(f"{speaker}.json", "w") as text_file:
25
+ # json.dump(text, text_file, indent=4)
26
+ # return result['text']
27
 
28
 
29
  def load_groups_json():