Realtime-whisper-large-v3-turbo

Running on Zero

App Files Files Community

KingNish commited on Oct 4, 2024

Commit

ceea111

verified ·

1 Parent(s): 23a2ead

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -30

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ pipe = pipeline(
     model=model,
     tokenizer=tokenizer,
     feature_extractor=processor.feature_extractor,
-    max_new_tokens=25,
     torch_dtype=torch_dtype,
     device=device,
 )
@@ -43,36 +43,10 @@ def transcribe(inputs, previous_transcription):
     try:
         filename = f"{uuid.uuid4().hex}.wav"
         sample_rate, audio_data = inputs
-        # Check the duration of the audio
-        duration = len(audio_data) / sample_rate  # Duration in seconds
-        if duration > 5:
-            # Split audio into chunks of 5 seconds
-            chunk_size = 5 * sample_rate  # Number of samples for 5 seconds
-            num_chunks = int(np.ceil(len(audio_data) / chunk_size))
-            transcriptions = []
-            for i in range(num_chunks):
-                start_index = i * chunk_size
-                end_index = min(start_index + chunk_size, len(audio_data))
-                chunk_data = audio_data[start_index:end_index]
-                # Write chunk to a temporary file
-                chunk_filename = f"{uuid.uuid4().hex}_chunk.wav"
-                scipy.io.wavfile.write(chunk_filename, sample_rate, chunk_data)
-                # Transcribe the chunk
-                transcription = pipe(chunk_filename)["text"]
-                transcriptions.append(transcription)
-            # Combine all transcriptions
-            previous_transcription += " ".join(transcriptions)
-        else:
-            # Write the original audio file if it's 5 seconds or less
-            scipy.io.wavfile.write(filename, sample_rate, audio_data)
-            transcription = pipe(filename)["text"]
-            previous_transcription += transcription
         end_time = time.time()
         latency = end_time - start_time

     model=model,
     tokenizer=tokenizer,
     feature_extractor=processor.feature_extractor,
+    chunk_length_s=10,
     torch_dtype=torch_dtype,
     device=device,
 )
     try:
         filename = f"{uuid.uuid4().hex}.wav"
         sample_rate, audio_data = inputs
+        scipy.io.wavfile.write(filename, sample_rate, audio_data)
+        transcription = pipe(filename)["text"]
+        previous_transcription += transcription
         end_time = time.time()
         latency = end_time - start_time