pritamdeka commited on
Commit
4206062
·
verified ·
1 Parent(s): 56925b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -19
app.py CHANGED
@@ -1,36 +1,67 @@
1
  import gradio as gr
 
2
  import whisper
3
- import os
 
4
 
5
- # Load Whisper model
6
- model = whisper.load_model("base")
 
7
 
8
- # Function to transcribe audio file using Whisper
9
- def transcribe_audio(audio_file):
10
- # Check if the audio file exists and print the file path for debugging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if audio_file is None:
12
  return "No audio file provided."
13
 
14
- # Debugging: Print the file path to check if Gradio passes the file path correctly
15
- print(f"Audio file path: {audio_file}")
16
-
17
  if not os.path.exists(audio_file):
18
  return "The audio file does not exist or is inaccessible."
 
 
 
19
 
20
- # Load and transcribe the audio file
21
- result = model.transcribe(audio_file)
22
- transcription = result['text']
 
 
 
23
 
24
  return transcription
25
 
26
- # Gradio interface for transcription
27
  iface = gr.Interface(
28
- fn=transcribe_audio, # Function to process audio file
29
- inputs=gr.Audio(type="filepath"), # Audio upload, pass file path
30
- outputs="text", # Output the transcription as text
31
- title="Whisper Audio Transcription",
32
- description="Upload an audio file and get the transcription."
33
  )
34
 
35
- # Launch the Gradio interface with a shareable link (required for Colab)
36
  iface.launch()
 
1
  import gradio as gr
2
+ import torch
3
  import whisper
4
+ import librosa
5
+ from transformers import pipeline
6
 
7
+ # Check if DistilWhisper is available on Hugging Face
8
+ # This is a placeholder model name, update it with an actual distillation model if available
9
+ # distil_whisper_model = "huggingface/distil-whisper-model"
10
 
11
+ # If no distil version, load smaller Whisper model for speed (e.g., "base" or "tiny")
12
+ model = whisper.load_model("tiny")
13
+
14
+ # Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
15
+ def chunk_audio(audio_file, chunk_size=5):
16
+ # Load audio file
17
+ audio, sr = librosa.load(audio_file, sr=16000)
18
+
19
+ # Determine the number of chunks (in seconds)
20
+ total_duration = len(audio) / sr
21
+ num_chunks = int(total_duration // chunk_size)
22
+
23
+ # Split the audio into chunks
24
+ audio_chunks = []
25
+ for i in range(num_chunks):
26
+ start = int(i * chunk_size * sr)
27
+ end = int((i + 1) * chunk_size * sr)
28
+ audio_chunks.append(audio[start:end])
29
+
30
+ # If the last chunk is shorter than chunk_size, append it as well
31
+ if len(audio) % (chunk_size * sr) != 0:
32
+ audio_chunks.append(audio[num_chunks * chunk_size * sr:])
33
+
34
+ return audio_chunks, sr
35
+
36
+ # Function to transcribe the audio in chunks using Whisper
37
+ def transcribe_audio_in_chunks(audio_file):
38
  if audio_file is None:
39
  return "No audio file provided."
40
 
41
+ # Check the audio file path
 
 
42
  if not os.path.exists(audio_file):
43
  return "The audio file does not exist or is inaccessible."
44
+
45
+ # Chunk the audio into 5-second parts
46
+ chunks, sr = chunk_audio(audio_file, chunk_size=5)
47
 
48
+ # Process each chunk and append the results as real-time transcription
49
+ transcription = ""
50
+ for i, chunk in enumerate(chunks):
51
+ # Transcribe each chunk
52
+ result = model.transcribe(chunk)
53
+ transcription += f"Chunk {i + 1}: {result['text']}\n"
54
 
55
  return transcription
56
 
57
+ # Gradio interface for real-time transcription with chunking
58
  iface = gr.Interface(
59
+ fn=transcribe_audio_in_chunks, # Function to process the audio file in chunks
60
+ inputs=gr.Audio(type="filepath"), # Audio upload, passing file path
61
+ outputs="text", # Output transcriptions in real-time
62
+ title="Whisper Audio Transcription with Chunking",
63
+ description="Upload an audio file, and Whisper will transcribe it in real-time as chunks."
64
  )
65
 
66
+ # Launch the Gradio interface with a shareable link (use share=True for Colab)
67
  iface.launch()