Spaces:
Running
Running
File size: 4,000 Bytes
bd97be7 9fe4dba b40af2a d07525d e88a1f3 d07525d e88a1f3 bd97be7 e88a1f3 8057378 e88a1f3 8057378 e88a1f3 8057378 e88a1f3 8057378 190e895 db3a36a 190e895 db3a36a 9036a70 875dc71 190e895 db3a36a 875dc71 190e895 e88a1f3 875dc71 190e895 875dc71 190e895 875dc71 190e895 db3a36a 190e895 8057378 525ee37 5f36451 d88ec40 525ee37 d88ec40 525ee37 e88a1f3 525ee37 e88a1f3 525ee37 db3a36a 525ee37 781e9f1 525ee37 190e895 e22e17f 525ee37 781e9f1 525ee37 b40af2a 525ee37 b40af2a 781e9f1 525ee37 6befe57 525ee37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import torch
from transformers import pipeline
import librosa
import soundfile as sf
import spaces
import os
def split_audio(audio_data, sr, chunk_duration=30):
"""Split audio into chunks of chunk_duration seconds."""
chunks = []
for start in range(0, len(audio_data), int(chunk_duration * sr)):
end = start + int(chunk_duration * sr)
chunks.append(audio_data[start:end])
return chunks
def transcribe_long_audio(audio_input, transcriber, chunk_duration=30):
"""Transcribe long audio by splitting into smaller chunks."""
try:
# Debugging input type and format
print(f"Audio input type: {type(audio_input)}")
if isinstance(audio_input, tuple): # Recorded audio
print("Processing recorded audio...")
audio_data, sr = audio_input # Unpack raw audio data and sample rate
temp_path = "recorded_audio.wav"
sf.write(temp_path, audio_data, sr) # Save recorded audio as a temporary file
elif isinstance(audio_input, str): # Uploaded file path
print("Processing uploaded audio...")
temp_path = audio_input # Use the file path directly
else:
raise ValueError("Unsupported audio input format.")
# Process the audio file (recorded or uploaded)
audio_data, sr = librosa.load(temp_path, sr=None)
chunks = split_audio(audio_data, sr, chunk_duration)
transcriptions = []
for i, chunk in enumerate(chunks):
chunk_path = f"temp_chunk_{i}.wav"
sf.write(chunk_path, chunk, sr) # Save chunk as WAV
transcription = transcriber(chunk_path)["text"]
transcriptions.append(transcription)
os.remove(chunk_path) # Cleanup temp files
if temp_path == "recorded_audio.wav":
os.remove(temp_path) # Remove the temporary recorded audio file
return " ".join(transcriptions)
except Exception as e:
print(f"Error in transcribe_long_audio: {e}")
return f"Error processing audio: {e}"
@spaces.GPU(duration=3)
def main():
device = 0 if torch.cuda.is_available() else -1
try:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
except Exception as e:
print(f"Error loading models: {e}")
raise
def process_audio(audio_input):
try:
transcription = transcribe_long_audio(audio_input, transcriber, chunk_duration=30)
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
return transcription, summary
except Exception as e:
print(f"Error in process_audio: {e}")
return f"Error processing audio: {e}", ""
def stop_microphone():
"""Simulate stopping the microphone."""
print("Microphone stopped.") # Debugging for user feedback
return "Microphone stopped. Recording session has ended."
with gr.Blocks() as interface:
with gr.Row():
with gr.Column():
# Enable recording or file upload
audio_input = gr.Audio(type="numpy", label="Record or Upload Audio")
process_button = gr.Button("Process Audio")
stop_button = gr.Button("Stop Recording") # Add Stop Button
with gr.Column():
transcription_output = gr.Textbox(label="Full Transcription", lines=10)
summary_output = gr.Textbox(label="Summary", lines=5)
process_button.click(
process_audio,
inputs=[audio_input],
outputs=[transcription_output, summary_output]
)
stop_button.click(
stop_microphone,
inputs=[],
outputs=[]
)
interface.launch(share=True)
if __name__ == "__main__":
main()
|