kadirnar commited on
Commit
038c89a
·
1 Parent(s): 3f780ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -17
app.py CHANGED
@@ -27,7 +27,7 @@ def youtube_url_to_text(url, model_id, language_choice):
27
  return transcript, video_path
28
 
29
 
30
- def speaker_diarization(url, model_id, device, num_speakers, min_speaker, max_speaker):
31
  """
32
  Main function that downloads and converts a video to MP3 format, performs speech-to-text conversion using
33
  a specified model, and returns the transcript along with the video path.
@@ -47,12 +47,11 @@ def speaker_diarization(url, model_id, device, num_speakers, min_speaker, max_sp
47
  diarizer_model="pyannote/speaker-diarization",
48
  use_auth_token=False,
49
  chunk_length_s=30,
50
- device=device,
51
  )
52
 
53
  audio_path = download_and_convert_to_mp3(url)
54
- output_text = pipeline(
55
- audio_path, num_speakers=num_speakers, min_speaker=min_speaker, max_speaker=max_speaker)
56
  dialogue = format_speech_to_dialogue(output_text)
57
  return dialogue, audio_path
58
 
@@ -140,11 +139,7 @@ def speaker_diarization_app():
140
  value="openai/whisper-large-v3",
141
  label="Whisper Model",
142
  )
143
- device = gr.Dropdown(
144
- choices=["cpu", "cuda", "mps"],
145
- value="cuda",
146
- label="Device",
147
- )
148
  num_speakers = gr.Number(value=2, label="Number of Speakers")
149
  min_speaker = gr.Number(value=1, label="Minimum Number of Speakers")
150
  max_speaker = gr.Number(value=2, label="Maximum Number of Speakers")
@@ -171,20 +166,12 @@ def speaker_diarization_app():
171
  [
172
  "https://www.youtube.com/shorts/o8PgLUgte2k",
173
  "openai/whisper-large-v3",
174
- "cuda",
175
- 2,
176
- 1,
177
- 2,
178
  ],
179
  ],
180
  fn=speaker_diarization,
181
  inputs=[
182
  youtube_url_path,
183
  whisper_model_id,
184
- device,
185
- num_speakers,
186
- min_speaker,
187
- max_speaker,
188
  ],
189
  outputs=[output_text, output_audio],
190
  cache_examples=True,
 
27
  return transcript, video_path
28
 
29
 
30
+ def speaker_diarization(url, model_id):
31
  """
32
  Main function that downloads and converts a video to MP3 format, performs speech-to-text conversion using
33
  a specified model, and returns the transcript along with the video path.
 
47
  diarizer_model="pyannote/speaker-diarization",
48
  use_auth_token=False,
49
  chunk_length_s=30,
50
+ device="cuda",
51
  )
52
 
53
  audio_path = download_and_convert_to_mp3(url)
54
+ output_text = pipeline(audio_path)
 
55
  dialogue = format_speech_to_dialogue(output_text)
56
  return dialogue, audio_path
57
 
 
139
  value="openai/whisper-large-v3",
140
  label="Whisper Model",
141
  )
142
+
 
 
 
 
143
  num_speakers = gr.Number(value=2, label="Number of Speakers")
144
  min_speaker = gr.Number(value=1, label="Minimum Number of Speakers")
145
  max_speaker = gr.Number(value=2, label="Maximum Number of Speakers")
 
166
  [
167
  "https://www.youtube.com/shorts/o8PgLUgte2k",
168
  "openai/whisper-large-v3",
 
 
 
 
169
  ],
170
  ],
171
  fn=speaker_diarization,
172
  inputs=[
173
  youtube_url_path,
174
  whisper_model_id,
 
 
 
 
175
  ],
176
  outputs=[output_text, output_audio],
177
  cache_examples=True,