Add Darija transcription and topic extraction app3
Browse files
app.py
CHANGED
@@ -23,39 +23,44 @@ def transcribe_audio(audio_path):
|
|
23 |
transcription = processor.batch_decode(predicted_ids)[0]
|
24 |
return transcription
|
25 |
|
26 |
-
# Function to
|
27 |
-
def
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
33 |
|
34 |
-
# Function to transcribe and
|
35 |
-
def
|
|
|
36 |
transcription = transcribe_audio(audio_file)
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
# Gradio Interface
|
42 |
-
inputs =
|
43 |
-
gr.Audio(type="filepath", label="Upload your audio file"),
|
44 |
-
gr.Textbox(label="Enter Keywords (comma-separated)", placeholder="e.g., customer, service, retention")
|
45 |
-
]
|
46 |
outputs = [
|
47 |
gr.Textbox(label="Transcription"),
|
48 |
-
gr.Textbox(label="Summary")
|
|
|
49 |
]
|
50 |
|
51 |
app = gr.Interface(
|
52 |
-
fn=
|
53 |
inputs=inputs,
|
54 |
outputs=outputs,
|
55 |
-
title="Moroccan Darija Audio
|
56 |
description=(
|
57 |
-
"Upload an audio file in Moroccan Darija to get its transcription
|
58 |
-
"
|
59 |
)
|
60 |
)
|
61 |
|
|
|
23 |
transcription = processor.batch_decode(predicted_ids)[0]
|
24 |
return transcription
|
25 |
|
26 |
+
# Function to analyze topics from summary
|
27 |
+
def analyze_topics(summary):
|
28 |
+
if "customer service" in summary.lower():
|
29 |
+
return "Customer Service"
|
30 |
+
elif "retention" in summary.lower():
|
31 |
+
return "Retention"
|
32 |
+
else:
|
33 |
+
return "Unknown"
|
34 |
|
35 |
+
# Function to transcribe, summarize, and analyze topics
|
36 |
+
def transcribe_summarize_analyze(audio_file):
|
37 |
+
# Transcription
|
38 |
transcription = transcribe_audio(audio_file)
|
39 |
+
|
40 |
+
# Summarization
|
41 |
+
summary = summarizer(transcription, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
|
42 |
+
|
43 |
+
# Topic Analysis
|
44 |
+
topic = analyze_topics(summary)
|
45 |
+
|
46 |
+
return transcription, summary, topic
|
47 |
|
48 |
# Gradio Interface
|
49 |
+
inputs = gr.Audio(type="filepath", label="Upload your audio file")
|
|
|
|
|
|
|
50 |
outputs = [
|
51 |
gr.Textbox(label="Transcription"),
|
52 |
+
gr.Textbox(label="Summary"),
|
53 |
+
gr.Textbox(label="Topic")
|
54 |
]
|
55 |
|
56 |
app = gr.Interface(
|
57 |
+
fn=transcribe_summarize_analyze,
|
58 |
inputs=inputs,
|
59 |
outputs=outputs,
|
60 |
+
title="Moroccan Darija Audio Analysis",
|
61 |
description=(
|
62 |
+
"Upload an audio file in Moroccan Darija to get its transcription, a summarized version, "
|
63 |
+
"and the detected topic (Customer Service or Retention)."
|
64 |
)
|
65 |
)
|
66 |
|