Add Darija transcription and topic extraction app
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
import torch
|
| 3 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
|
| 4 |
import soundfile as sf
|
|
|
|
| 5 |
|
| 6 |
# Load models
|
| 7 |
# Transcription model for Moroccan Darija
|
|
@@ -11,10 +12,17 @@ transcription_model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xl
|
|
| 11 |
# Summarization model
|
| 12 |
summarizer = pipeline("summarization", model="t5-small")
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Function to transcribe audio using Wav2Vec2
|
| 15 |
def transcribe_audio(audio_path):
|
| 16 |
# Load and preprocess audio
|
| 17 |
-
audio_input, sample_rate =
|
| 18 |
inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
|
| 19 |
|
| 20 |
# Get predictions
|
|
@@ -31,8 +39,13 @@ def transcribe_and_summarize(audio_file):
|
|
| 31 |
# Transcription
|
| 32 |
transcription = transcribe_audio(audio_file)
|
| 33 |
|
| 34 |
-
#
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
return transcription, summary
|
| 37 |
|
| 38 |
# Gradio Interface
|
|
|
|
| 2 |
import torch
|
| 3 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
|
| 4 |
import soundfile as sf
|
| 5 |
+
import librosa
|
| 6 |
|
| 7 |
# Load models
|
| 8 |
# Transcription model for Moroccan Darija
|
|
|
|
| 12 |
# Summarization model
|
| 13 |
summarizer = pipeline("summarization", model="t5-small")
|
| 14 |
|
| 15 |
+
# Function to resample audio to 16kHz if necessary
|
| 16 |
+
def resample_audio(audio_path, target_sr=16000):
|
| 17 |
+
audio_input, original_sr = librosa.load(audio_path, sr=None) # Load audio with original sampling rate
|
| 18 |
+
if original_sr != target_sr:
|
| 19 |
+
audio_input = librosa.resample(audio_input, orig_sr=original_sr, target_sr=target_sr) # Resample to 16kHz
|
| 20 |
+
return audio_input, target_sr
|
| 21 |
+
|
| 22 |
# Function to transcribe audio using Wav2Vec2
|
| 23 |
def transcribe_audio(audio_path):
|
| 24 |
# Load and preprocess audio
|
| 25 |
+
audio_input, sample_rate = resample_audio(audio_path)
|
| 26 |
inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
|
| 27 |
|
| 28 |
# Get predictions
|
|
|
|
| 39 |
# Transcription
|
| 40 |
transcription = transcribe_audio(audio_file)
|
| 41 |
|
| 42 |
+
# Check if transcription is long enough for summarization
|
| 43 |
+
if len(transcription.split()) < 10: # Check if the transcription is too short for summarization
|
| 44 |
+
summary = "Transcription is too short for summarization."
|
| 45 |
+
else:
|
| 46 |
+
# Summarization
|
| 47 |
+
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
| 48 |
+
|
| 49 |
return transcription, summary
|
| 50 |
|
| 51 |
# Gradio Interface
|