Spaces:

hackergeek98
/

tinyyy

Sleeping

hackergeek98 commited on Mar 23

Commit

994674b

verified ·

1 Parent(s): 322b20d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import torch
 # Load the fine-tuned Whisper model and processor
 model_name = "hackergeek98/tinyyyy_whisper"
@@ -12,9 +13,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 # Define the ASR function
-def transcribe_audio(audio):
-    # Load audio file
-    sampling_rate, audio_data = audio
     # Preprocess the audio
     inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
@@ -30,10 +31,10 @@ def transcribe_audio(audio):
 # Create the Gradio interface
 interface = gr.Interface(
     fn=transcribe_audio,  # Function to call
-    inputs=gr.Audio(type="numpy"),  # Input: Upload audio file
     outputs=gr.Textbox(label="Transcription"),  # Output: Display transcription
     title="Whisper ASR: Tinyyyy Model",
-    description="Upload an audio file, and the fine-tuned Whisper model will transcribe it.",
 )
 # Launch the app

 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import torch
+import librosa
 # Load the fine-tuned Whisper model and processor
 model_name = "hackergeek98/tinyyyy_whisper"
 model.to(device)
 # Define the ASR function
+def transcribe_audio(audio_file):
+    # Load audio file using librosa (supports multiple formats)
+    audio_data, sampling_rate = librosa.load(audio_file, sr=16000)  # Resample to 16kHz
     # Preprocess the audio
     inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
 # Create the Gradio interface
 interface = gr.Interface(
     fn=transcribe_audio,  # Function to call
+    inputs=gr.Audio(type="filepath"),  # Input: Upload audio file (any format)
     outputs=gr.Textbox(label="Transcription"),  # Output: Display transcription
     title="Whisper ASR: Tinyyyy Model",
+    description="Upload an audio file (e.g., .wav, .mp3, .ogg), and the fine-tuned Whisper model will transcribe it.",
 )
 # Launch the app