abidlabs HF Staff commited on
Commit
b709d83
·
1 Parent(s): 74dcb37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -31
app.py CHANGED
@@ -1,20 +1,63 @@
1
  import gradio as gr
2
- import time
 
 
 
3
 
4
- EXAMPLES = ['test.wav']
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def speech_to_text(speech):
7
- time.sleep(1)
8
- return [
9
- ("So I've prepared a presentation I'm sharing it with All you should be ", "Speaker 1"),
10
- ("able to seat on your screen right now. Got it?", "Speaker 1"),
11
- ("from 0.258-6.249", None),
12
- ("I don't see a link anywhere says it Headed down low a plug and.", "Speaker 2"),
13
- ("from 6.384-9.573", None)], """so i've prepared a presentation \n i'm sharing it with all you should be able to seat on your screen right now got it i don't see a link anywhere says it headed down low a plug and"""
14
-
15
- def sentiment(checked_options):
16
- time.sleep(0.3)
17
- return {"happy": 0.5, "confused": 0.3, "sad": 0.2}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  demo = gr.Blocks()
20
  demo.encrypt = False
@@ -22,35 +65,25 @@ demo.encrypt = False
22
  with demo:
23
  with gr.Row():
24
  with gr.Column():
25
- audio = gr.Audio(label="Audio file")
26
  with gr.Row():
27
  btn = gr.Button("Transcribe")
28
-
29
  with gr.Row():
30
- examples = gr.components.Dataset(
31
- components=[audio],
32
- samples=[EXAMPLES],
33
- type="index",
34
- )
35
-
36
  with gr.Column():
37
  gr.Markdown("**Diarized Output:**")
38
  diarized = gr.HighlightedText(lines=5, label="Diarized Output")
39
  full = gr.Textbox(lines=4, label="Full Transcript")
40
- check = gr.CheckboxGroup(["Speaker 1", "Speaker 2"], label="Choose speaker(s) for sentiment analysis")
41
- label = gr.Label()
42
 
43
  btn.click(speech_to_text, audio, [diarized, full], status_tracker=gr.StatusTracker(cover_container=True))
44
- check.change(sentiment, check, label, status_tracker=gr.StatusTracker(cover_container=True))
45
-
46
  def load_example(example_id):
47
  processed_examples = audio.preprocess_example(EXAMPLES[example_id])
48
- print(processed_examples)
49
  return processed_examples
50
-
51
- examples._click_no_postprocess(
52
- load_example,
53
- inputs=[examples],
54
- outputs=[audio])
55
 
56
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, Wav2Vec2ProcessorWithLM
3
+ from pyannote.audio import Pipeline
4
+ from librosa import load, resample
5
+ from rpunct import RestorePuncts
6
 
7
+ # Audio components
8
+ asr_model = 'patrickvonplaten/wav2vec2-base-960h-4-gram'
9
+ processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model)
10
+ asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder)
11
+ speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation")
12
+ rpunct = RestorePuncts()
13
+
14
+ # Text components
15
+ sentiment_pipeline = pipeline('text-classification', model="distilbert-base-uncased-finetuned-sst-2-english")
16
+ sentiment_threshold = 0.75
17
+
18
+ EXAMPLES = ["example_audio.wav"]
19
 
20
  def speech_to_text(speech):
21
+ speaker_output = speaker_segmentation(speech)
22
+ speech, sampling_rate = load(speech)
23
+ if sampling_rate != 16000:
24
+ speech = resample(speech, sampling_rate, 16000)
25
+ text = asr(speech, return_timestamps="word")
26
+
27
+ full_text = text['text'].lower()
28
+ chunks = text['chunks']
29
+
30
+ diarized_output = []
31
+ i = 0
32
+ speaker_counter = 0
33
+
34
+ # New iteration every time the speaker changes
35
+ for turn, _, _ in speaker_output.itertracks(yield_label=True):
36
+ speaker = "Speaker 0" if speaker_counter % 2 == 0 else "Speaker 1"
37
+ diarized = ""
38
+ while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end:
39
+ diarized += chunks[i]['text'].lower() + ' '
40
+ i += 1
41
+
42
+ if diarized != "":
43
+ diarized = rpunct.punctuate(diarized)
44
+ diarized_output.extend([(diarized, speaker), ('from {:.2f}-{:.2f}'.format(turn.start, turn.end), None)])
45
+ speaker_counter += 1
46
+ return diarized_output, full_text
47
+
48
+ def sentiment(checked_options, diarized):
49
+ customer_id = checked_options
50
+ customer_sentiments = []
51
+
52
+ for transcript in diarized:
53
+ speaker_speech, speaker_id = transcript
54
+ if speaker_id == customer_id:
55
+ output = sentiment_pipeline(speaker_speech)[0]
56
+ if output["label"] != "neutral" and output["score"] > sentiment_threshold:
57
+ customer_sentiments.append((speaker_speech, output["label"]))
58
+ else:
59
+ customer_sentiments.append(speaker_speech, None)
60
+ return customer_sentiments
61
 
62
  demo = gr.Blocks()
63
  demo.encrypt = False
 
65
  with demo:
66
  with gr.Row():
67
  with gr.Column():
68
+ audio = gr.Audio(label="Audio file", type='filepath')
69
  with gr.Row():
70
  btn = gr.Button("Transcribe")
 
71
  with gr.Row():
72
+ examples = gr.components.Dataset(components=[audio], samples=[EXAMPLES], type="index")
 
 
 
 
 
73
  with gr.Column():
74
  gr.Markdown("**Diarized Output:**")
75
  diarized = gr.HighlightedText(lines=5, label="Diarized Output")
76
  full = gr.Textbox(lines=4, label="Full Transcript")
77
+ check = gr.Radio(["Speaker 0", "Speaker 1"], label='Choose speaker for sentiment analysis')
78
+ analyzed = gr.HighlightedText(label="Customer Sentiment")
79
 
80
  btn.click(speech_to_text, audio, [diarized, full], status_tracker=gr.StatusTracker(cover_container=True))
81
+ check.change(sentiment, [check, diarized], analyzed, status_tracker=gr.StatusTracker(cover_container=True))
82
+
83
  def load_example(example_id):
84
  processed_examples = audio.preprocess_example(EXAMPLES[example_id])
 
85
  return processed_examples
86
+
87
+ examples._click_no_postprocess(load_example, inputs=[examples], outputs=[audio])
 
 
 
88
 
89
  demo.launch()