pritamdeka commited on
Commit
a62c4d4
·
verified ·
1 Parent(s): d69306e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -15
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import gradio as gr
2
  import whisper
3
  from transformers import pipeline
4
- import librosa
5
- import os
6
 
7
  # Load Whisper model
8
  whisper_model = whisper.load_model("base")
@@ -18,20 +17,18 @@ def get_summarizer(model_name):
18
  else:
19
  return None
20
 
21
- # Function to transcribe audio file using Whisper
22
- def transcribe_audio(model_size, audio_path):
23
- # Debug: Check if the file path is correctly passed
24
- print(f"Audio file path received: {audio_path}")
25
-
26
- if audio_path is None or not os.path.exists(audio_path):
27
- return "No audio file provided or file path invalid."
 
28
 
29
  # Load the selected Whisper model
30
  model = whisper.load_model(model_size)
31
 
32
- # Load and convert audio using librosa
33
- audio_data, sample_rate = librosa.load(audio_path, sr=16000)
34
-
35
  # Transcribe the audio file
36
  result = model.transcribe(audio_data)
37
  transcription = result['text']
@@ -52,9 +49,9 @@ def summarize_text(transcription, model_name):
52
  return "Invalid summarization model selected."
53
 
54
  # Create a Gradio interface that combines transcription and summarization
55
- def combined_transcription_and_summarization(model_size, summarizer_model, audio_path):
56
  # Step 1: Transcribe the audio using Whisper
57
- transcription = transcribe_audio(model_size, audio_path)
58
 
59
  # Step 2: Summarize the transcribed text using the chosen summarizer model
60
  summary = summarize_text(transcription, summarizer_model)
@@ -67,7 +64,7 @@ iface = gr.Interface(
67
  inputs=[
68
  gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), # Whisper model selection
69
  gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), # Summarizer model selection
70
- gr.Audio(type="filepath") # Audio upload
71
  ],
72
  outputs=[
73
  gr.Textbox(label="Transcription"), # Output for the transcribed text
 
1
  import gradio as gr
2
  import whisper
3
  from transformers import pipeline
4
+ import numpy as np
 
5
 
6
  # Load Whisper model
7
  whisper_model = whisper.load_model("base")
 
17
  else:
18
  return None
19
 
20
+ # Function to transcribe raw audio data using Whisper
21
+ def transcribe_audio(model_size, audio):
22
+ if audio is None:
23
+ return "No audio file provided."
24
+
25
+ # Convert the input audio (which is a tuple) into the format Whisper expects
26
+ audio_data = np.array(audio[1]) # audio[1] is the raw audio data
27
+ sample_rate = 16000 # Whisper expects a sample rate of 16kHz
28
 
29
  # Load the selected Whisper model
30
  model = whisper.load_model(model_size)
31
 
 
 
 
32
  # Transcribe the audio file
33
  result = model.transcribe(audio_data)
34
  transcription = result['text']
 
49
  return "Invalid summarization model selected."
50
 
51
  # Create a Gradio interface that combines transcription and summarization
52
+ def combined_transcription_and_summarization(model_size, summarizer_model, audio):
53
  # Step 1: Transcribe the audio using Whisper
54
+ transcription = transcribe_audio(model_size, audio)
55
 
56
  # Step 2: Summarize the transcribed text using the chosen summarizer model
57
  summary = summarize_text(transcription, summarizer_model)
 
64
  inputs=[
65
  gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), # Whisper model selection
66
  gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), # Summarizer model selection
67
+ gr.Audio(type="numpy") # This will pass raw audio data as a numpy array
68
  ],
69
  outputs=[
70
  gr.Textbox(label="Transcription"), # Output for the transcribed text