KingNish commited on
Commit
ea9f05f
·
verified ·
1 Parent(s): ceea111

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -3
app.py CHANGED
@@ -25,7 +25,7 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained(
25
  model.to(device)
26
 
27
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
28
- tokenizer = WhisperTokenizer.from_pretrained(MODEL_NAME, language="en")
29
 
30
  pipe = pipeline(
31
  task="automatic-speech-recognition",
@@ -55,6 +55,24 @@ def transcribe(inputs, previous_transcription):
55
  print(f"Error during Transcription: {e}")
56
  return previous_transcription, "Error"
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  def clear():
60
  return ""
@@ -72,7 +90,7 @@ with gr.Blocks() as microphone:
72
  input_audio_microphone.stream(transcribe, [input_audio_microphone, output], [output, latency_textbox], time_limit=45, stream_every=2, concurrency_limit=None)
73
  clear_button.click(clear, outputs=[output])
74
 
75
- with gr.Blocks() as flie:
76
  with gr.Column():
77
  gr.Markdown(f"# Realtime Whisper Large V3 Turbo: \n Transcribe Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
78
  with gr.Row():
@@ -86,7 +104,20 @@ with gr.Blocks() as flie:
86
  submit_button.click(transcribe, [input_audio_microphone, output], [output, latency_textbox], concurrency_limit=None)
87
  clear_button.click(clear, outputs=[output])
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  with gr.Blocks() as demo:
90
- gr.TabbedInterface([microphone, flie], ["Microphone", "Audio file"])
91
 
92
  demo.launch()
 
25
  model.to(device)
26
 
27
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
28
+ tokenizer = WhisperTokenizer.from_pretrained(MODEL_NAME)
29
 
30
  pipe = pipeline(
31
  task="automatic-speech-recognition",
 
55
  print(f"Error during Transcription: {e}")
56
  return previous_transcription, "Error"
57
 
58
+ @spaces.GPU
59
+ def translate_and_transcribe(inputs, previous_transcription):
60
+ start_time = time.time()
61
+ try:
62
+ filename = f"{uuid.uuid4().hex}.wav"
63
+ sample_rate, audio_data = inputs
64
+ scipy.io.wavfile.write(filename, sample_rate, audio_data)
65
+
66
+ translation = pipe(filename, language="<|es|>" , generate_kwargs={"task": "translate"} )["text"]
67
+
68
+ previous_transcription += translation
69
+
70
+ end_time = time.time()
71
+ latency = end_time - start_time
72
+ return previous_transcription, f"{latency:.2f}"
73
+ except Exception as e:
74
+ print(f"Error during Translation and Transcription: {e}")
75
+ return previous_transcription, "Error"
76
 
77
  def clear():
78
  return ""
 
90
  input_audio_microphone.stream(transcribe, [input_audio_microphone, output], [output, latency_textbox], time_limit=45, stream_every=2, concurrency_limit=None)
91
  clear_button.click(clear, outputs=[output])
92
 
93
+ with gr.Blocks() as file:
94
  with gr.Column():
95
  gr.Markdown(f"# Realtime Whisper Large V3 Turbo: \n Transcribe Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
96
  with gr.Row():
 
104
  submit_button.click(transcribe, [input_audio_microphone, output], [output, latency_textbox], concurrency_limit=None)
105
  clear_button.click(clear, outputs=[output])
106
 
107
+ with gr.Blocks() as translate:
108
+ with gr.Column():
109
+ gr.Markdown(f"# Realtime Whisper Large V3 Turbo (Translation): \n Transcribe and Translate Audio in Realtime. This Demo uses the Checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.\n Note: The first token takes about 5 seconds. After that, it works flawlessly.")
110
+ with gr.Row():
111
+ input_audio_microphone = gr.Audio(streaming=True)
112
+ output = gr.Textbox(label="Transcription and Translation", value="")
113
+ latency_textbox = gr.Textbox(label="Latency (seconds)", value="0.0", scale=0)
114
+ with gr.Row():
115
+ clear_button = gr.Button("Clear Output")
116
+
117
+ input_audio_microphone.stream(translate_and_transcribe, [input_audio_microphone, output], [output, latency_textbox], time_limit=45, stream_every=2, concurrency_limit=None)
118
+ clear_button.click(clear, outputs=[output])
119
+
120
  with gr.Blocks() as demo:
121
+ gr.TabbedInterface([microphone, translate, file], ["Microphone", "Realtime Translation", "Transcribe from file"])
122
 
123
  demo.launch()