Realtime-whisper-large-v3-turbo

Running on Zero

KingNish commited on Oct 1, 2024

Commit

8c4d38d

verified ·

1 Parent(s): 72be79d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import tempfile
 import os
 import uuid
 import scipy.io.wavfile
 MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
 BATCH_SIZE = 8
@@ -21,20 +22,16 @@ pipe = pipeline(
 @spaces.GPU
 def transcribe(inputs, previous_transcription):
     try:
-        # Generate a unique filename using UUID
-        filename = f"{uuid.uuid4().hex}.wav"
-        filepath = os.path.join(tempfile.gettempdir(), filename)
-        # Extract sample rate and audio data from the tuple
         sample_rate, audio_data = inputs
-        # Save the audio data to the temporary file
-        scipy.io.wavfile.write(filepath, sample_rate, audio_data)
-        previous_transcription += pipe(filepath, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
-        # Remove the temporary file after transcription
-        os.remove(filepath)
         return previous_transcription
     except Exception as e:

 import os
 import uuid
 import scipy.io.wavfile
+import numpy as np
 MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 @spaces.GPU
 def transcribe(inputs, previous_transcription):
     try:
         sample_rate, audio_data = inputs
+        # Convert audio data to a NumPy array
+        audio_data = np.frombuffer(audio_data, dtype=np.int16)
+        previous_transcription += pipe(audio_data,
+                                       batch_size=BATCH_SIZE,
+                                       generate_kwargs={"task": "transcribe"},
+                                       return_timestamps=True,
+                                       sampling_rate=sample_rate)["text"]
         return previous_transcription
     except Exception as e: