hackergeek98 commited on
Commit
994674b
·
verified ·
1 Parent(s): 322b20d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  import torch
 
4
 
5
  # Load the fine-tuned Whisper model and processor
6
  model_name = "hackergeek98/tinyyyy_whisper"
@@ -12,9 +13,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
12
  model.to(device)
13
 
14
  # Define the ASR function
15
- def transcribe_audio(audio):
16
- # Load audio file
17
- sampling_rate, audio_data = audio
18
 
19
  # Preprocess the audio
20
  inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
@@ -30,10 +31,10 @@ def transcribe_audio(audio):
30
  # Create the Gradio interface
31
  interface = gr.Interface(
32
  fn=transcribe_audio, # Function to call
33
- inputs=gr.Audio(type="numpy"), # Input: Upload audio file
34
  outputs=gr.Textbox(label="Transcription"), # Output: Display transcription
35
  title="Whisper ASR: Tinyyyy Model",
36
- description="Upload an audio file, and the fine-tuned Whisper model will transcribe it.",
37
  )
38
 
39
  # Launch the app
 
1
  import gradio as gr
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  import torch
4
+ import librosa
5
 
6
  # Load the fine-tuned Whisper model and processor
7
  model_name = "hackergeek98/tinyyyy_whisper"
 
13
  model.to(device)
14
 
15
  # Define the ASR function
16
+ def transcribe_audio(audio_file):
17
+ # Load audio file using librosa (supports multiple formats)
18
+ audio_data, sampling_rate = librosa.load(audio_file, sr=16000) # Resample to 16kHz
19
 
20
  # Preprocess the audio
21
  inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
 
31
  # Create the Gradio interface
32
  interface = gr.Interface(
33
  fn=transcribe_audio, # Function to call
34
+ inputs=gr.Audio(type="filepath"), # Input: Upload audio file (any format)
35
  outputs=gr.Textbox(label="Transcription"), # Output: Display transcription
36
  title="Whisper ASR: Tinyyyy Model",
37
+ description="Upload an audio file (e.g., .wav, .mp3, .ogg), and the fine-tuned Whisper model will transcribe it.",
38
  )
39
 
40
  # Launch the app