chompionsawelo commited on
Commit
ed6e5d8
·
1 Parent(s): 18e34db

small trial

Browse files
Files changed (2) hide show
  1. app.py +8 -4
  2. diarization.py +27 -19
app.py CHANGED
@@ -1,19 +1,23 @@
1
  from huggingface_hub import login
2
- from diarization import startDiarization
 
3
  import ffmpeg
4
  import gradio as gr
5
  import os
6
 
7
 
8
- def prepareInput(input_file):
9
  output_file = "input.wav"
10
  ffmpeg.input(input_file).audio.output(
11
  output_file, format="wav").run()
12
- return startDiarization(output_file)
 
 
 
13
 
14
 
15
  video_interface = gr.Interface(
16
- fn=prepareInput,
17
  inputs=gr.Video(type="file"),
18
  outputs="text",
19
  title="Get Diarization"
 
1
  from huggingface_hub import login
2
+ from diarization import start_diarization
3
+ from transcribe import start_transcribe
4
  import ffmpeg
5
  import gradio as gr
6
  import os
7
 
8
 
9
+ def prepare_input(input_file):
10
  output_file = "input.wav"
11
  ffmpeg.input(input_file).audio.output(
12
  output_file, format="wav").run()
13
+
14
+ progress = gr.Progress()
15
+ start_diarization(output_file, progress)
16
+ return start_transcribe(progress)
17
 
18
 
19
  video_interface = gr.Interface(
20
+ fn=prepare_input,
21
  inputs=gr.Video(type="file"),
22
  outputs="text",
23
  title="Get Diarization"
diarization.py CHANGED
@@ -1,42 +1,50 @@
1
  from pyannote.audio import Pipeline
2
  from pydub import AudioSegment
 
3
  import os
4
  import torch
5
  import json
6
 
7
- hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
 
8
  pipeline = Pipeline.from_pretrained(
9
  'pyannote/speaker-diarization', use_auth_token=hugging_face_token)
10
  device = torch.device("cuda")
11
  pipeline.to(device)
12
 
13
 
14
- def startDiarization(input_file):
15
  print("Starting diarization")
 
16
  diarization = pipeline(input_file)
17
 
18
  sample_groups = []
19
  speaker_groups = {}
20
- for turn, _, speaker in diarization.itertracks(yield_label=True):
21
- if (speaker not in sample_groups):
22
- sample_groups.append(str(speaker))
23
-
24
- suffix = 1
25
- file_name = f"{speaker}-{suffix}"
26
- while file_name in speaker_groups:
27
- suffix += 1
28
- file_name = f"{speaker}-{suffix}"
29
- speaker_groups[file_name] = [turn.start, turn.end]
30
- print(f"speaker_groups {file_name}: {speaker_groups[file_name]}")
31
- print(f"start={turn.start:.3f}s stop={turn.end:.3f}s speaker_{speaker}")
32
-
33
- saveGroupsJson(sample_groups, speaker_groups)
34
- audioSegmentation(input_file, speaker_groups)
 
 
 
 
 
35
  print(str(speaker_groups))
36
  return str(speaker_groups)
37
 
38
 
39
- def audioSegmentation(input_file, speaker_groups_dict):
40
  audioSegment = AudioSegment.from_wav(input_file)
41
  for speaker in speaker_groups_dict:
42
  time = speaker_groups_dict[speaker]
@@ -45,7 +53,7 @@ def audioSegmentation(input_file, speaker_groups_dict):
45
  print(f"group {speaker}: {time[0]*1000}--{time[1]*1000}")
46
 
47
 
48
- def saveGroupsJson(sample_groups_list: list, speaker_groups_dict: dict):
49
  with open("sample_groups.json", "w") as json_file_sample:
50
  json.dump(sample_groups_list, json_file_sample)
51
  with open("speaker_groups.json", "w") as json_file_speaker:
 
1
  from pyannote.audio import Pipeline
2
  from pydub import AudioSegment
3
+ import gradio as gr
4
  import os
5
  import torch
6
  import json
7
 
8
+ # hugging_face_token = os.environ["HUGGING_FACE_TOKEN"]
9
+ hugging_face_token = "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"
10
  pipeline = Pipeline.from_pretrained(
11
  'pyannote/speaker-diarization', use_auth_token=hugging_face_token)
12
  device = torch.device("cuda")
13
  pipeline.to(device)
14
 
15
 
16
+ def start_diarization(input_file, progress: gr.Progress):
17
  print("Starting diarization")
18
+ progress(0, desc="Starting diarization")
19
  diarization = pipeline(input_file)
20
 
21
  sample_groups = []
22
  speaker_groups = {}
23
+ print(str(diarization))
24
+ # for turn, _, speaker in diarization.itertracks(yield_label=True):
25
+ # print(diarization)
26
+ # for step in progress.tqdm(diarization.)
27
+
28
+ # if (speaker not in sample_groups):
29
+ # sample_groups.append(str(speaker))
30
+
31
+ # suffix = 1
32
+ # file_name = f"{speaker}-{suffix}"
33
+ # while file_name in speaker_groups:
34
+ # suffix += 1
35
+ # file_name = f"{speaker}-{suffix}"
36
+ # speaker_groups[file_name] = [turn.start, turn.end]
37
+
38
+ # print(f"speaker_groups {file_name}: {speaker_groups[file_name]}")
39
+ # print(f"start={turn.start:.3f}s stop={turn.end:.3f}s speaker_{speaker}")
40
+
41
+ save_groups_json(sample_groups, speaker_groups)
42
+ audio_segmentation(input_file, speaker_groups)
43
  print(str(speaker_groups))
44
  return str(speaker_groups)
45
 
46
 
47
+ def audio_segmentation(input_file, speaker_groups_dict):
48
  audioSegment = AudioSegment.from_wav(input_file)
49
  for speaker in speaker_groups_dict:
50
  time = speaker_groups_dict[speaker]
 
53
  print(f"group {speaker}: {time[0]*1000}--{time[1]*1000}")
54
 
55
 
56
+ def save_groups_json(sample_groups_list: list, speaker_groups_dict: dict):
57
  with open("sample_groups.json", "w") as json_file_sample:
58
  json.dump(sample_groups_list, json_file_sample)
59
  with open("speaker_groups.json", "w") as json_file_speaker: