vumichien commited on
Commit
5eb8f47
·
1 Parent(s): 01d02b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -18,25 +18,29 @@ def process_audio_file(file):
18
  return inputs
19
 
20
 
21
- def transcribe(file):
22
  inputs = process_audio_file(file)
23
  with torch.no_grad():
24
  output_logit = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
25
  pred_ids = torch.argmax(output_logit, dim=-1)
26
- return processor.batch_decode(pred_ids)[0]
 
 
27
 
28
 
29
  description = "A simple interface to transcribe from spoken Japanese to Hiragana."
30
  article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>."
31
- inputs = [gr.inputs.Audio(source="microphone", type='filepath', optional=True)
32
- ]
 
 
33
  examples = [["samples/BASIC5000_0001.wav"],
34
  ["samples/BASIC5000_0005.wav"]
35
  ]
36
  iface = gr.Interface(
37
  fn=transcribe,
38
  inputs=inputs,
39
- outputs="text",
40
  layout="horizontal",
41
  theme="huggingface",
42
  title="Transcribe Japanese audio to Hiragana",
@@ -44,5 +48,6 @@ iface = gr.Interface(
44
  article=article,
45
  allow_flagging='never',
46
  examples=examples,
 
47
  )
48
  iface.launch(enable_queue=True, share=True)
 
18
  return inputs
19
 
20
 
21
+ def transcribe(file, state=""):
22
  inputs = process_audio_file(file)
23
  with torch.no_grad():
24
  output_logit = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
25
  pred_ids = torch.argmax(output_logit, dim=-1)
26
+ text = processor.batch_decode(pred_ids)[0]
27
+ state += text + " "
28
+ return state, state
29
 
30
 
31
  description = "A simple interface to transcribe from spoken Japanese to Hiragana."
32
  article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>."
33
+ inputs = [gr.inputs.Audio(source="microphone", type="filepath", optional=True),
34
+ "state"]
35
+ outputs = ["textbox", "state"]
36
+
37
  examples = [["samples/BASIC5000_0001.wav"],
38
  ["samples/BASIC5000_0005.wav"]
39
  ]
40
  iface = gr.Interface(
41
  fn=transcribe,
42
  inputs=inputs,
43
+ outputs=outputs,
44
  layout="horizontal",
45
  theme="huggingface",
46
  title="Transcribe Japanese audio to Hiragana",
 
48
  article=article,
49
  allow_flagging='never',
50
  examples=examples,
51
+ live=True,
52
  )
53
  iface.launch(enable_queue=True, share=True)