MyIVR / app.py
JabriA's picture
Add Darija transcription and topic extraction app
c3c6112
raw
history blame
2.48 kB
import gradio as gr
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
import soundfile as sf
import librosa
# Load models
# Transcription model for Moroccan Darija
processor = Wav2Vec2Processor.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
transcription_model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
# Summarization model
summarizer = pipeline("summarization", model="t5-small")
# Function to resample audio to 16kHz if necessary
def resample_audio(audio_path, target_sr=16000):
audio_input, original_sr = librosa.load(audio_path, sr=None) # Load audio with original sampling rate
if original_sr != target_sr:
audio_input = librosa.resample(audio_input, orig_sr=original_sr, target_sr=target_sr) # Resample to 16kHz
return audio_input, target_sr
# Function to transcribe audio using Wav2Vec2
def transcribe_audio(audio_path):
# Load and preprocess audio
audio_input, sample_rate = resample_audio(audio_path)
inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
# Get predictions
with torch.no_grad():
logits = transcription_model(**inputs).logits
# Decode predictions
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
return transcription
# Function to transcribe and summarize
def transcribe_and_summarize(audio_file):
# Transcription
transcription = transcribe_audio(audio_file)
# Check if transcription is long enough for summarization
if len(transcription.split()) < 10: # Check if the transcription is too short for summarization
summary = "Transcription is too short for summarization."
else:
# Summarization
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
return transcription, summary
# Gradio Interface
inputs = gr.Audio(type="filepath", label="Upload your audio file")
outputs = [
gr.Textbox(label="Transcription"),
gr.Textbox(label="Summary")
]
app = gr.Interface(
fn=transcribe_and_summarize,
inputs=inputs,
outputs=outputs,
title="Moroccan Darija Audio Transcription and Summarization",
description="Upload an audio file in Moroccan Darija to get its transcription and a summarized version of the content."
)
# Launch the app
if __name__ == "__main__":
app.launch()