chompionsawelo commited on
Commit
32e4ded
·
1 Parent(s): 3130060

last adjusment

Browse files
Files changed (3) hide show
  1. app.py +4 -1
  2. list.py +0 -0
  3. transcribe.py +33 -6
app.py CHANGED
@@ -16,10 +16,13 @@ def prepare_input(input_file, progress=gr.Progress()):
16
  return start_transcribe(progress)
17
 
18
 
 
 
 
19
  video_interface = gr.Interface(
20
  fn=prepare_input,
21
  inputs=gr.Video(type="file"),
22
- outputs="text",
23
  title="Test 2"
24
  )
25
 
 
16
  return start_transcribe(progress)
17
 
18
 
19
+ output_files = gr.outputs.File(
20
+ label="Transcribe"), gr.outputs.File(label="Subtitle")
21
+
22
  video_interface = gr.Interface(
23
  fn=prepare_input,
24
  inputs=gr.Video(type="file"),
25
+ outputs=output_files,
26
  title="Test 2"
27
  )
28
 
list.py ADDED
File without changes
transcribe.py CHANGED
@@ -11,6 +11,7 @@ model = WhisperModel("medium", device="cuda", compute_type="int8_float16")
11
 
12
  def start_transcribe(progress):
13
  _, speaker_groups = load_groups_json()
 
14
  for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Processing diarization")):
15
  # Transcribe and save temp file
16
  audiof = f"{speaker}.wav"
@@ -18,12 +19,38 @@ def start_transcribe(progress):
18
  segments, _ = model.transcribe(
19
  audio=audiof, language='id', word_timestamps=True)
20
  segments_list = list(segments)
21
- print("SEGMENT LIST: " + str(segments_list))
22
- text = segments_list.join(' ')
23
- print("TEXT: " + str(text))
24
- # with open(f"{speaker}.json", "w") as text_file:
25
- # json.dump(text, text_file, indent=4)
26
- # return result['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def load_groups_json():
 
11
 
12
  def start_transcribe(progress):
13
  _, speaker_groups = load_groups_json()
14
+
15
  for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Processing diarization")):
16
  # Transcribe and save temp file
17
  audiof = f"{speaker}.wav"
 
19
  segments, _ = model.transcribe(
20
  audio=audiof, language='id', word_timestamps=True)
21
  segments_list = list(segments)
22
+
23
+ text_list_to_print = []
24
+ for segment in segments_list:
25
+ start = timeStr(segment['start'])
26
+ end = timeStr(segment['end'])
27
+ name = str(speaker)[:10]
28
+ text = segment["text"]
29
+ subtitle_txt = f"{len(subtitle_txt) + 1}\n{start} --> {end}\n[{name}] {text}\n\n"
30
+ # Appending subtitle txt for each segment
31
+ with open("subtitle.srt", "a") as file:
32
+ file.writelines(subtitle_txt)
33
+ # Appending text for each segment to print
34
+ text_list_to_print.append(text)
35
+
36
+ # Print full text for each speaker turn
37
+ text = "\n".join(text_list_to_print)
38
+ print(text)
39
+
40
+ # Create transcribe per speaker
41
+ with open(f"{speaker}.json", "w") as text_file:
42
+ json.dump(segments_list, text_file, indent=4)
43
+ # Append to complete transcribe file
44
+ with open("transcribe.txt", "a") as file:
45
+ file.write(f"[{name}] {text}\n")
46
+
47
+ return ["subtitle.srt", "transcribe.txt"]
48
+
49
+
50
+ def timeStr(t):
51
+ return '{0:02d}:{1:02d}:{2:06.2f}'.format(round(t // 3600),
52
+ round(t % 3600 // 60),
53
+ t % 60)
54
 
55
 
56
  def load_groups_json():