asahi417 commited on
Commit
3d437ce
·
verified ·
1 Parent(s): c84c901

Update benchmark_hf_pipeline.py

Browse files
Files changed (1) hide show
  1. benchmark_hf_pipeline.py +25 -0
benchmark_hf_pipeline.py CHANGED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pprint import pprint
2
+ from transformers import pipeline
3
+ from datasets import load_dataset
4
+
5
+ # config
6
+ model_id = "kotoba-tech/kotoba-whisper-v1.0"
7
+ generate_kwargs = {"language": "japanese", "task": "transcribe"}
8
+
9
+ # load model
10
+ pipe = pipeline(
11
+ "automatic-speech-recognition",
12
+ model=model_id,
13
+ chunk_length_s=15,
14
+ batch_size=64
15
+ )
16
+
17
+ # load sample audio (concatenate instances to create a long audio)
18
+ dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
19
+ x = dataset['audio'][0]
20
+ elapsed = {}
21
+ for x in dataset['audio']:
22
+ start = time()
23
+ transcription = pipe(x.copy(), generate_kwargs=generate_kwargs)
24
+ elapsed[x['path']] = time() - start
25
+ pprint(elapsed)