wang0507 commited on
Commit
4dfea18
·
1 Parent(s): 3cc9351

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -28
app.py CHANGED
@@ -1,21 +1,14 @@
 
1
  import os
2
  os.system("pip install git+https://github.com/openai/whisper.git")
3
  import gradio as gr
4
  import whisper
5
- import numpy as np
 
6
 
7
  model = whisper.load_model("base")
8
 
9
- def transcribe(stream, new_chunk):
10
- sr, y = new_chunk
11
- y = y.astype(np.float32)
12
- y /= np.max(np.abs(y))
13
 
14
- if stream is not None:
15
- stream = np.concatenate([stream, y])
16
- else:
17
- stream = y
18
- return stream, whisper.transcribe(stream, model=model, sampling_rate=sr)["text"]
19
 
20
  def inference(audio):
21
  audio = whisper.load_audio(audio)
@@ -25,26 +18,17 @@ def inference(audio):
25
 
26
  _, probs = model.detect_language(mel)
27
 
28
- options = whisper.DecodingOptions(fp16=False)
29
  result = whisper.decode(model, mel, options)
30
 
31
  return result.text
32
 
33
- with gr.Blocks() as demo:
34
- gr.Markdown("Flip text or image files using this demo.")
35
- with gr.Tab("語音轉文字"):
36
- with gr.Row():
37
- inputs = gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。")
38
- outputs = "text"
39
- fn = inference
40
- iface = gr.Interface(fn=fn, inputs=inputs, outputs=outputs)
41
- ["state", iface]
42
- with gr.Tab("Real Time Speech Recognition"):
43
- with gr.Row():
44
- transcribe
45
- ["state", gr.Audio(sources=["microphone"], streaming=True)],
46
- ["state", "text"],
47
- live=True
48
-
49
- demo.launch()
50
 
 
1
+
2
  import os
3
  os.system("pip install git+https://github.com/openai/whisper.git")
4
  import gradio as gr
5
  import whisper
6
+
7
+
8
 
9
  model = whisper.load_model("base")
10
 
 
 
 
 
11
 
 
 
 
 
 
12
 
13
  def inference(audio):
14
  audio = whisper.load_audio(audio)
 
18
 
19
  _, probs = model.detect_language(mel)
20
 
21
+ options = whisper.DecodingOptions(fp16 = False)
22
  result = whisper.decode(model, mel, options)
23
 
24
  return result.text
25
 
26
+ iface = gr.Interface(
27
+ fn=inference,
28
+ inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
29
+ outputs="text"
30
+ )
31
+
32
+ # 启动 Gradio 界面
33
+ iface.launch()
 
 
 
 
 
 
 
 
 
34