File size: 2,037 Bytes
9ba153c
 
fb1bb66
 
 
ac8d0e5
 
 
9ba153c
18526df
337d2a4
 
18526df
b7ea2f1
 
 
f799767
664ecda
b7ea2f1
 
b992dd8
23e1010
da27dba
68856b5
 
ac8d0e5
1e4e32f
090d19c
195ae3a
ac8d0e5
b7ea2f1
ac8d0e5
 
b7ea2f1
 
 
 
 
 
da27dba
ab046ec
b7ea2f1
ab046ec
 
 
 
b7ea2f1
 
68856b5
b7ea2f1
ab046ec
0a8189c
ab046ec
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# >>>>>>   Adapted/frankensteined from these scripts:   <<<<<<<
#                 for Summary Interface:
# >>>> https://huggingface.co/spaces/khxu/pegasus-text-summarizers/blob/main/app.py
#                      Audio Interface
# >>>> https://huggingface.co/spaces/iSky/Speech-audio-to-text-with-grammar-correction/blob/main/app.py
#                      Gramar
# >>>> https://huggingface.co/deep-learning-analytics/GrammarCorrector/blob/main/README.md


import gradio as gr
from transformers import pipeline
from gradio.mix import Parallel, Series

# >>>>>>>>>>>>>>>>>>>> Danger Below <<<<<<<<<<<<<<<<<<<<<<
# Load Interfaces:
s2t = gr.Interface.load('huggingface/facebook/s2t-medium-librispeech-asr')
grammar = gr.Interface.load('huggingface/deep-learning-analytics/GrammarCorrector')
sum_it = gr.Interface.load('huggingface/SamuelMiller/lil_sum_sum') 

# Audio Functions:
def out(audio):
  flag = True
  if audio==None:
    return "no audio" 
  
  else:               
    a = s2t(audio)
    #g = grammar(a)
    #s = sum_it(g)                              # Summarize Audio with sum_it
    return a, a #grammar(a, num_return_sequences=1) # grammar(s),                       # Grammar Filter 

  #else:
   # return "something is wrong in the function?"

# Construct Interfaces:
iface = gr.Interface(
  fn=out, 
  title="Speech Audio to text (with corrected grammar)",
  description="Let's Hear It!! This app transforms your speech (input) to text with corrected grammar after (output)!",
  inputs= gr.inputs.Audio(source="microphone", type="filepath", label=None, optional=True),
  outputs= ['text','text']
)

# Launch Interface
iface.launch(enable_queue=True,show_error=True)

  # From Original Code:
# gr.inputs.Audio(source="upload", type="filepath", label=None, optional=True),  
# examples=[["Grammar-Correct-Sample.mp3"], ["Grammar-Wrong-Sample.mp3"],],

#def speech_to_text(inp):
    #pass  # speech recognition model defined here

#gr.Interface(speech_to_text, inputs="mic", outputs=gr.Textbox(label="Predicted text", lines=4))