kotoba-tech
/

kotoba-whisper-v1.0-ggml

Automatic Speech Recognition

Model card Files Files and versions Community

asahi417 commited on May 8, 2024

Commit

f1a4af3

·

verified ·

1 Parent(s): 3d437ce

Update benchmark_hf_pipeline.py

Files changed (1) hide show

benchmark_hf_pipeline.py +9 -7

benchmark_hf_pipeline.py CHANGED Viewed

@@ -13,13 +13,15 @@ pipe = pipeline(
     chunk_length_s=15,
     batch_size=64
 )
-# load sample audio (concatenate instances to create a long audio)
-dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
-x = dataset['audio'][0]
 elapsed = {}
-for x in dataset['audio']:
     start = time()
-    transcription = pipe(x.copy(), generate_kwargs=generate_kwargs)
-    elapsed[x['path']] = time() - start
 pprint(elapsed)

     chunk_length_s=15,
     batch_size=64
 )
+test_audio = [
+    "kotoba-whisper-eval/audio/long_interview_1.wav",
+    "kotoba-whisper-eval/audio/manzai1.wav",
+    "kotoba-whisper-eval/audio/manzai2.wav",
+    "kotoba-whisper-eval/audio/manzai3.wav"
+]
 elapsed = {}
+for x in test_audio:
     start = time()
+    transcription = pipe(x, generate_kwargs=generate_kwargs)
+    elapsed[x] = time() - start
 pprint(elapsed)