JacobLinCool commited on
Commit
f4c725a
·
1 Parent(s): 108107c

feat: update model

Browse files
Files changed (3) hide show
  1. app.py +41 -7
  2. model.py +25 -0
  3. requirements.txt +2 -0
app.py CHANGED
@@ -1,13 +1,40 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
3
 
4
- model_id = "JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-2"
5
- client = InferenceClient(model_id)
 
 
 
 
6
 
7
 
8
  def transcribe_audio(audio: str) -> str:
9
- out = client.automatic_speech_recognition(audio)
10
- return out.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  with gr.Blocks() as demo:
@@ -23,7 +50,14 @@ with gr.Blocks() as demo:
23
  )
24
  text_output = gr.Textbox(label="Transcription")
25
 
26
- transcribe_button = gr.Button("Transcribe with Inference API")
 
 
 
 
 
 
 
27
  transcribe_button.click(
28
  fn=transcribe_audio, inputs=[audio_input], outputs=[text_output]
29
  )
@@ -35,7 +69,7 @@ with gr.Blocks() as demo:
35
  ],
36
  inputs=[audio_input],
37
  outputs=[text_output],
38
- fn=transcribe_audio,
39
  cache_examples=True,
40
  cache_mode="lazy",
41
  run_on_click=True,
 
1
  import gradio as gr
2
+ from huggingface_hub.utils import get_token
3
+ import requests
4
+ import base64
5
+ from model import model_id, transcribe_audio_local
6
 
7
+ token = get_token()
8
+
9
+
10
+ def read_file_as_base64(file_path: str) -> str:
11
+ with open(file_path, "rb") as f:
12
+ return base64.b64encode(f.read()).decode()
13
 
14
 
15
  def transcribe_audio(audio: str) -> str:
16
+ print(f"{audio=}")
17
+
18
+ b64 = read_file_as_base64(audio)
19
+ url = f"https://api-inference.huggingface.co/models/{model_id}"
20
+ headers = {
21
+ "Authorization": f"Bearer {token}",
22
+ "Content-Type": "application/json",
23
+ "x-wait-for-model": "true",
24
+ }
25
+ data = {
26
+ "inputs": b64,
27
+ "parameters": {
28
+ "generate_kwargs": {
29
+ "num_beams": 5,
30
+ }
31
+ },
32
+ }
33
+ response = requests.post(url, headers=headers, json=data)
34
+ out = response.json()
35
+ print(f"{out=}")
36
+
37
+ return out["text"]
38
 
39
 
40
  with gr.Blocks() as demo:
 
50
  )
51
  text_output = gr.Textbox(label="Transcription")
52
 
53
+ transcribe_local_button = gr.Button(
54
+ "Transcribe with Transformers", variant="primary"
55
+ )
56
+ transcribe_button = gr.Button("Transcribe with Inference API", variant="secondary")
57
+
58
+ transcribe_local_button.click(
59
+ fn=transcribe_audio_local, inputs=[audio_input], outputs=[text_output]
60
+ )
61
  transcribe_button.click(
62
  fn=transcribe_audio, inputs=[audio_input], outputs=[text_output]
63
  )
 
69
  ],
70
  inputs=[audio_input],
71
  outputs=[text_output],
72
+ fn=transcribe_audio_local,
73
  cache_examples=True,
74
  cache_mode="lazy",
75
  run_on_click=True,
model.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from accelerate import Accelerator
3
+
4
+ device = Accelerator().device
5
+
6
+ model_id = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
7
+
8
+ pipe = None
9
+
10
+
11
+ def load_model():
12
+ global pipe
13
+ pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
14
+
15
+
16
+ def transcribe_audio_local(audio: str) -> str:
17
+ print(f"{audio=}")
18
+
19
+ if pipe is None:
20
+ load_model()
21
+
22
+ out = pipe(audio)
23
+ print(f"{out=}")
24
+
25
+ return out["text"]
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio==5.4.0
2
  huggingface_hub==0.26.2
 
 
 
1
  gradio==5.4.0
2
  huggingface_hub==0.26.2
3
+ transformers
4
+ accelerate