JabriA commited on
Commit
c3c6112
·
1 Parent(s): 8663026

Add Darija transcription and topic extraction app

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import torch
3
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
4
  import soundfile as sf
 
5
 
6
  # Load models
7
  # Transcription model for Moroccan Darija
@@ -11,10 +12,17 @@ transcription_model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xl
11
  # Summarization model
12
  summarizer = pipeline("summarization", model="t5-small")
13
 
 
 
 
 
 
 
 
14
  # Function to transcribe audio using Wav2Vec2
15
  def transcribe_audio(audio_path):
16
  # Load and preprocess audio
17
- audio_input, sample_rate = sf.read(audio_path)
18
  inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
19
 
20
  # Get predictions
@@ -31,8 +39,13 @@ def transcribe_and_summarize(audio_file):
31
  # Transcription
32
  transcription = transcribe_audio(audio_file)
33
 
34
- # Summarization
35
- summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
 
 
 
 
 
36
  return transcription, summary
37
 
38
  # Gradio Interface
 
2
  import torch
3
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
4
  import soundfile as sf
5
+ import librosa
6
 
7
  # Load models
8
  # Transcription model for Moroccan Darija
 
12
  # Summarization model
13
  summarizer = pipeline("summarization", model="t5-small")
14
 
15
+ # Function to resample audio to 16kHz if necessary
16
+ def resample_audio(audio_path, target_sr=16000):
17
+ audio_input, original_sr = librosa.load(audio_path, sr=None) # Load audio with original sampling rate
18
+ if original_sr != target_sr:
19
+ audio_input = librosa.resample(audio_input, orig_sr=original_sr, target_sr=target_sr) # Resample to 16kHz
20
+ return audio_input, target_sr
21
+
22
  # Function to transcribe audio using Wav2Vec2
23
  def transcribe_audio(audio_path):
24
  # Load and preprocess audio
25
+ audio_input, sample_rate = resample_audio(audio_path)
26
  inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
27
 
28
  # Get predictions
 
39
  # Transcription
40
  transcription = transcribe_audio(audio_file)
41
 
42
+ # Check if transcription is long enough for summarization
43
+ if len(transcription.split()) < 10: # Check if the transcription is too short for summarization
44
+ summary = "Transcription is too short for summarization."
45
+ else:
46
+ # Summarization
47
+ summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
48
+
49
  return transcription, summary
50
 
51
  # Gradio Interface