File size: 2,697 Bytes
391606f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b998e8c
 
 
 
 
 
 
 
 
 
 
c92fb39
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# clone whisper.cpp
git clone https://github.com/ggerganov/whisper.cpp.git
cd whisper.cpp
# clone dataset
git clone https://huggingface.co/datasets/kotoba-tech/kotoba-whisper-eval
# convert to 16khz
ffmpeg -i kotoba-whisper-eval/audio/long_interview_1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/long_interview_1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai2.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai2.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai3.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai3.wav
# clone weight
wget https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0-ggml/resolve/main/ggml-kotoba-whisper-v1.0.bin -P ./models
# benchmark main model
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/long_interview_1.wav
TIME_INTERVIEW=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/manzai1.wav
TIME_MANZAI1=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/manzai2.wav
TIME_MANZAI2=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/manzai3.wav
TIME_MANZAI3=$SECONDS
# clone weight (quantized)
wget https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0-ggml/resolve/main/ggml-kotoba-whisper-v1.0-q5_0.bin -P ./models
# benchmark quantized model
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/long_interview_1.wav
TIME_INTERVIEW_Q=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/manzai1.wav
TIME_MANZAI1_Q=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/manzai2.wav
TIME_MANZAI2_Q=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/manzai3.wav
TIME_MANZAI3_Q=$SECONDS
# summary
echo "MAIN MODEL"
echo "interview: $TIME_INTERVIEW"
echo "manzai1  : $TIME_MANZAI1"
echo "manzai2  : $TIME_MANZAI2"
echo "manzai3  : $TIME_MANZAI3"
echo "QUANTIZED MODEL"
echo "interview: $TIME_INTERVIEW_Q"
echo "manzai1  : $TIME_MANZAI1_Q"
echo "manzai2  : $TIME_MANZAI2_Q"
echo "manzai3  : $TIME_MANZAI3_Q"

# Result on MacBookPro:
# - Apple M2 Pro
# - 32GB
# - 14-inch, 2023
# - OS Sonoma Version 14.4.1 (23E224)
# MAIN MODEL
# interview: 581
# manzai1  : 41
# manzai2  : 30
# manzai3  : 35
# QUANTIZED MODEL
# interview: 677
# manzai1  : 37
# manzai2  : 36
# manzai3  : 42