JabriA commited on
Commit
3785854
·
1 Parent(s): 61e1f2a

Add Darija transcription and topic extraction app3

Browse files
Files changed (1) hide show
  1. app.py +26 -21
app.py CHANGED
@@ -23,39 +23,44 @@ def transcribe_audio(audio_path):
23
  transcription = processor.batch_decode(predicted_ids)[0]
24
  return transcription
25
 
26
- # Function to filter text by keywords
27
- def filter_text_by_keywords(text, keywords):
28
- keyword_list = keywords.split(",")
29
- filtered_sentences = [
30
- sentence for sentence in text.split(". ") if any(keyword.strip().lower() in sentence.lower() for keyword in keyword_list)
31
- ]
32
- return ". ".join(filtered_sentences) if filtered_sentences else text
 
33
 
34
- # Function to transcribe and summarize
35
- def transcribe_and_summarize(audio_file, keywords):
 
36
  transcription = transcribe_audio(audio_file)
37
- filtered_text = filter_text_by_keywords(transcription, keywords)
38
- summary = summarizer(filtered_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
39
- return transcription, summary
 
 
 
 
 
40
 
41
  # Gradio Interface
42
- inputs = [
43
- gr.Audio(type="filepath", label="Upload your audio file"),
44
- gr.Textbox(label="Enter Keywords (comma-separated)", placeholder="e.g., customer, service, retention")
45
- ]
46
  outputs = [
47
  gr.Textbox(label="Transcription"),
48
- gr.Textbox(label="Summary")
 
49
  ]
50
 
51
  app = gr.Interface(
52
- fn=transcribe_and_summarize,
53
  inputs=inputs,
54
  outputs=outputs,
55
- title="Moroccan Darija Audio Transcription and Summarization",
56
  description=(
57
- "Upload an audio file in Moroccan Darija to get its transcription and a summarized version. "
58
- "Specify relevant keywords (comma-separated) to filter the transcription before summarization."
59
  )
60
  )
61
 
 
23
  transcription = processor.batch_decode(predicted_ids)[0]
24
  return transcription
25
 
26
+ # Function to analyze topics from summary
27
+ def analyze_topics(summary):
28
+ if "customer service" in summary.lower():
29
+ return "Customer Service"
30
+ elif "retention" in summary.lower():
31
+ return "Retention"
32
+ else:
33
+ return "Unknown"
34
 
35
+ # Function to transcribe, summarize, and analyze topics
36
+ def transcribe_summarize_analyze(audio_file):
37
+ # Transcription
38
  transcription = transcribe_audio(audio_file)
39
+
40
+ # Summarization
41
+ summary = summarizer(transcription, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
42
+
43
+ # Topic Analysis
44
+ topic = analyze_topics(summary)
45
+
46
+ return transcription, summary, topic
47
 
48
  # Gradio Interface
49
+ inputs = gr.Audio(type="filepath", label="Upload your audio file")
 
 
 
50
  outputs = [
51
  gr.Textbox(label="Transcription"),
52
+ gr.Textbox(label="Summary"),
53
+ gr.Textbox(label="Topic")
54
  ]
55
 
56
  app = gr.Interface(
57
+ fn=transcribe_summarize_analyze,
58
  inputs=inputs,
59
  outputs=outputs,
60
+ title="Moroccan Darija Audio Analysis",
61
  description=(
62
+ "Upload an audio file in Moroccan Darija to get its transcription, a summarized version, "
63
+ "and the detected topic (Customer Service or Retention)."
64
  )
65
  )
66