import gradio as gr import torch from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline import soundfile as sf # Load models # Transcription model for Moroccan Darija processor = Wav2Vec2Processor.from_pretrained("achrafkhannoussi/Wav2Vec2-Large-XLSR-53-Moroccan-Darija") transcription_model = Wav2Vec2ForCTC.from_pretrained("achrafkhannoussi/Wav2Vec2-Large-XLSR-53-Moroccan-Darija") # Summarization model summarizer = pipeline("summarization", model="t5-small") # Function to transcribe audio using Wav2Vec2 def transcribe_audio(audio_path): # Load and preprocess audio audio_input, sample_rate = sf.read(audio_path) inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True) # Get predictions with torch.no_grad(): logits = transcription_model(**inputs).logits # Decode predictions predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.batch_decode(predicted_ids)[0] return transcription # Function to transcribe and summarize def transcribe_and_summarize(audio_file): # Transcription transcription = transcribe_audio(audio_file) # Summarization summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"] return transcription, summary # Gradio Interface inputs = gr.Audio(type="filepath", label="Upload your audio file") outputs = [ gr.Textbox(label="Transcription"), gr.Textbox(label="Summary") ] app = gr.Interface( fn=transcribe_and_summarize, inputs=inputs, outputs=outputs, title="Moroccan Darija Audio Transcription and Summarization", description="Upload an audio file in Moroccan Darija to get its transcription and a summarized version of the content." ) # Launch the app if __name__ == "__main__": app.launch()