pritamdeka commited on
Commit
d9cee8f
·
verified ·
1 Parent(s): a62c4d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -63
app.py CHANGED
@@ -1,78 +1,36 @@
1
  import gradio as gr
2
  import whisper
3
- from transformers import pipeline
4
- import numpy as np
5
 
6
  # Load Whisper model
7
- whisper_model = whisper.load_model("base")
8
 
9
- # Load traditional summarization models
10
- def get_summarizer(model_name):
11
- if model_name == "BART (facebook/bart-large-cnn)":
12
- return pipeline("summarization", model="facebook/bart-large-cnn")
13
- elif model_name == "T5 (t5-small)":
14
- return pipeline("summarization", model="t5-small")
15
- elif model_name == "Pegasus (google/pegasus-xsum)":
16
- return pipeline("summarization", model="google/pegasus-xsum")
17
- else:
18
- return None
19
-
20
- # Function to transcribe raw audio data using Whisper
21
- def transcribe_audio(model_size, audio):
22
- if audio is None:
23
  return "No audio file provided."
24
 
25
- # Convert the input audio (which is a tuple) into the format Whisper expects
26
- audio_data = np.array(audio[1]) # audio[1] is the raw audio data
27
- sample_rate = 16000 # Whisper expects a sample rate of 16kHz
28
-
29
- # Load the selected Whisper model
30
- model = whisper.load_model(model_size)
31
 
32
- # Transcribe the audio file
33
- result = model.transcribe(audio_data)
34
  transcription = result['text']
35
 
36
  return transcription
37
 
38
- # Function to summarize the transcribed text
39
- def summarize_text(transcription, model_name):
40
- if len(transcription.strip()) == 0:
41
- return "No text to summarize."
42
-
43
- summarizer = get_summarizer(model_name)
44
-
45
- if summarizer:
46
- summary = summarizer(transcription, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
47
- return summary
48
- else:
49
- return "Invalid summarization model selected."
50
-
51
- # Create a Gradio interface that combines transcription and summarization
52
- def combined_transcription_and_summarization(model_size, summarizer_model, audio):
53
- # Step 1: Transcribe the audio using Whisper
54
- transcription = transcribe_audio(model_size, audio)
55
-
56
- # Step 2: Summarize the transcribed text using the chosen summarizer model
57
- summary = summarize_text(transcription, summarizer_model)
58
-
59
- return transcription, summary
60
-
61
- # Gradio interface for transcription and summarization
62
  iface = gr.Interface(
63
- fn=combined_transcription_and_summarization, # The combined function
64
- inputs=[
65
- gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), # Whisper model selection
66
- gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), # Summarizer model selection
67
- gr.Audio(type="numpy") # This will pass raw audio data as a numpy array
68
- ],
69
- outputs=[
70
- gr.Textbox(label="Transcription"), # Output for the transcribed text
71
- gr.Textbox(label="Summary") # Output for the summary
72
- ],
73
- title="Whisper Audio Transcription and Summarization",
74
- description="Upload an audio file, choose a Whisper model for transcription, and a summarization model to summarize the transcription."
75
  )
76
 
77
- # Launch the interface
78
- iface.launch()
 
1
  import gradio as gr
2
  import whisper
3
+ import os
 
4
 
5
  # Load Whisper model
6
+ model = whisper.load_model("base")
7
 
8
+ # Function to transcribe audio file using Whisper
9
+ def transcribe_audio(audio_file):
10
+ # Check if the audio file exists and print the file path for debugging
11
+ if audio_file is None:
 
 
 
 
 
 
 
 
 
 
12
  return "No audio file provided."
13
 
14
+ # Debugging: Print the file path to check if Gradio passes the file path correctly
15
+ print(f"Audio file path: {audio_file}")
16
+
17
+ if not os.path.exists(audio_file):
18
+ return "The audio file does not exist or is inaccessible."
 
19
 
20
+ # Load and transcribe the audio file
21
+ result = model.transcribe(audio_file)
22
  transcription = result['text']
23
 
24
  return transcription
25
 
26
+ # Gradio interface for transcription
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  iface = gr.Interface(
28
+ fn=transcribe_audio, # Function to process audio file
29
+ inputs=gr.Audio(type="filepath"), # Audio upload, pass file path
30
+ outputs="text", # Output the transcription as text
31
+ title="Whisper Audio Transcription",
32
+ description="Upload an audio file and get the transcription."
 
 
 
 
 
 
 
33
  )
34
 
35
+ # Launch the Gradio interface with a shareable link (required for Colab)
36
+ iface.launch(share=True)