Add Darija transcription and topic extraction app
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
|
4 |
import soundfile as sf
|
|
|
5 |
|
6 |
# Load models
|
7 |
# Transcription model for Moroccan Darija
|
@@ -11,10 +12,17 @@ transcription_model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xl
|
|
11 |
# Summarization model
|
12 |
summarizer = pipeline("summarization", model="t5-small")
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Function to transcribe audio using Wav2Vec2
|
15 |
def transcribe_audio(audio_path):
|
16 |
# Load and preprocess audio
|
17 |
-
audio_input, sample_rate =
|
18 |
inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
|
19 |
|
20 |
# Get predictions
|
@@ -31,8 +39,13 @@ def transcribe_and_summarize(audio_file):
|
|
31 |
# Transcription
|
32 |
transcription = transcribe_audio(audio_file)
|
33 |
|
34 |
-
#
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
return transcription, summary
|
37 |
|
38 |
# Gradio Interface
|
|
|
2 |
import torch
|
3 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
|
4 |
import soundfile as sf
|
5 |
+
import librosa
|
6 |
|
7 |
# Load models
|
8 |
# Transcription model for Moroccan Darija
|
|
|
12 |
# Summarization model
|
13 |
summarizer = pipeline("summarization", model="t5-small")
|
14 |
|
15 |
+
# Function to resample audio to 16kHz if necessary
|
16 |
+
def resample_audio(audio_path, target_sr=16000):
|
17 |
+
audio_input, original_sr = librosa.load(audio_path, sr=None) # Load audio with original sampling rate
|
18 |
+
if original_sr != target_sr:
|
19 |
+
audio_input = librosa.resample(audio_input, orig_sr=original_sr, target_sr=target_sr) # Resample to 16kHz
|
20 |
+
return audio_input, target_sr
|
21 |
+
|
22 |
# Function to transcribe audio using Wav2Vec2
|
23 |
def transcribe_audio(audio_path):
|
24 |
# Load and preprocess audio
|
25 |
+
audio_input, sample_rate = resample_audio(audio_path)
|
26 |
inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
|
27 |
|
28 |
# Get predictions
|
|
|
39 |
# Transcription
|
40 |
transcription = transcribe_audio(audio_file)
|
41 |
|
42 |
+
# Check if transcription is long enough for summarization
|
43 |
+
if len(transcription.split()) < 10: # Check if the transcription is too short for summarization
|
44 |
+
summary = "Transcription is too short for summarization."
|
45 |
+
else:
|
46 |
+
# Summarization
|
47 |
+
summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
|
48 |
+
|
49 |
return transcription, summary
|
50 |
|
51 |
# Gradio Interface
|