File size: 2,244 Bytes
b789fdf 6c74174 b789fdf cf75eeb 6c74174 8663026 6c74174 61e1f2a b789fdf 61e1f2a 6c74174 61e1f2a 6c74174 3785854 61e1f2a 3785854 6c74174 3785854 cf75eeb ff9c2e5 3785854 ff9c2e5 3785854 ff9c2e5 cf75eeb ff9c2e5 3785854 ff9c2e5 3785854 61e1f2a 3785854 61e1f2a cf75eeb ff9c2e5 6c74174 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
import soundfile as sf
# Load models
# Transcription model for Moroccan Darija
processor = Wav2Vec2Processor.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
transcription_model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
# Summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to transcribe audio
def transcribe_audio(audio_path):
audio_input, sample_rate = sf.read(audio_path)
if sample_rate != 16000:
raise ValueError("Audio must be sampled at 16kHz.")
inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
with torch.no_grad():
logits = transcription_model(**inputs).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
return transcription
# Function to analyze topics from summary
def analyze_topics(summary):
if "customer service" in summary.lower():
return "Customer Service"
elif "retention" in summary.lower():
return "Retention"
else:
return "Unknown"
# Function to transcribe, summarize, and analyze topics
def transcribe_summarize_analyze(audio_file):
# Transcription
transcription = transcribe_audio(audio_file)
# Summarization
summary = summarizer(transcription, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
# Topic Analysis
topic = analyze_topics(summary)
return transcription, summary, topic
# Gradio Interface
inputs = gr.Audio(type="filepath", label="Upload your audio file")
outputs = [
gr.Textbox(label="Transcription"),
gr.Textbox(label="Summary"),
gr.Textbox(label="Topic")
]
app = gr.Interface(
fn=transcribe_summarize_analyze,
inputs=inputs,
outputs=outputs,
title="Moroccan Darija Audio Analysis",
description=(
"Upload an audio file in Moroccan Darija to get its transcription, a summarized version, "
"and the detected topic (Customer Service or Retention)."
)
)
# Launch the app
if __name__ == "__main__":
app.launch()
|