File size: 2,244 Bytes
b789fdf
6c74174
 
 
b789fdf
cf75eeb
6c74174
8663026
 
6c74174
 
61e1f2a
b789fdf
61e1f2a
6c74174
61e1f2a
 
 
6c74174
 
 
 
 
 
 
3785854
 
 
 
 
 
 
 
61e1f2a
3785854
 
 
6c74174
3785854
 
 
 
 
 
 
 
cf75eeb
ff9c2e5
3785854
ff9c2e5
 
3785854
 
ff9c2e5
cf75eeb
ff9c2e5
3785854
ff9c2e5
 
3785854
61e1f2a
3785854
 
61e1f2a
cf75eeb
 
ff9c2e5
6c74174
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
import soundfile as sf

# Load models
# Transcription model for Moroccan Darija
processor = Wav2Vec2Processor.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
transcription_model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")

# Summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to transcribe audio
def transcribe_audio(audio_path):
    audio_input, sample_rate = sf.read(audio_path)
    if sample_rate != 16000:
        raise ValueError("Audio must be sampled at 16kHz.")
    inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
    with torch.no_grad():
        logits = transcription_model(**inputs).logits
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.batch_decode(predicted_ids)[0]
    return transcription

# Function to analyze topics from summary
def analyze_topics(summary):
    if "customer service" in summary.lower():
        return "Customer Service"
    elif "retention" in summary.lower():
        return "Retention"
    else:
        return "Unknown"

# Function to transcribe, summarize, and analyze topics
def transcribe_summarize_analyze(audio_file):
    # Transcription
    transcription = transcribe_audio(audio_file)

    # Summarization
    summary = summarizer(transcription, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]

    # Topic Analysis
    topic = analyze_topics(summary)

    return transcription, summary, topic

# Gradio Interface
inputs = gr.Audio(type="filepath", label="Upload your audio file")
outputs = [
    gr.Textbox(label="Transcription"),
    gr.Textbox(label="Summary"),
    gr.Textbox(label="Topic")
]

app = gr.Interface(
    fn=transcribe_summarize_analyze,
    inputs=inputs,
    outputs=outputs,
    title="Moroccan Darija Audio Analysis",
    description=(
        "Upload an audio file in Moroccan Darija to get its transcription, a summarized version, "
        "and the detected topic (Customer Service or Retention)."
    )
)

# Launch the app
if __name__ == "__main__":
    app.launch()