File size: 3,325 Bytes
391606f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a7ddcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be7d414
 
391606f
 
 
 
 
 
 
 
 
 
b998e8c
 
 
 
 
 
 
 
 
 
 
c92fb39
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# clone whisper.cpp
git clone https://github.com/ggerganov/whisper.cpp.git
cd whisper.cpp
# clone dataset
git clone https://huggingface.co/datasets/kotoba-tech/kotoba-whisper-eval
# convert to 16khz
ffmpeg -i kotoba-whisper-eval/audio/long_interview_1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/long_interview_1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai2.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai2.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai3.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai3.wav
# clone weight
wget https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0-ggml/resolve/main/ggml-kotoba-whisper-v1.0.bin -P ./models
# benchmark main model
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/long_interview_1.wav
TIME_INTERVIEW=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/manzai1.wav
TIME_MANZAI1=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/manzai2.wav
TIME_MANZAI2=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0.bin -f kotoba-whisper-eval/audio/manzai3.wav
TIME_MANZAI3=$SECONDS
# clone weight (quantized)
wget https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0-ggml/resolve/main/ggml-kotoba-whisper-v1.0-q5_0.bin -P ./models
# benchmark quantized model
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/long_interview_1.wav
TIME_INTERVIEW_Q=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/manzai1.wav
TIME_MANZAI1_Q=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/manzai2.wav
TIME_MANZAI2_Q=$SECONDS
SECONDS=0
make -j && ./main -m models/ggml-kotoba-whisper-v1.0-q5_0.bin -f kotoba-whisper-eval/audio/manzai3.wav
TIME_MANZAI3_Q=$SECONDS

# # clone the weight
# bash ./models/download-ggml-model.sh large-v3
# # benchmark large-v3
# SECONDS=0
# make -j && ./main -m models/ggml-large-v3.bin -f kotoba-whisper-eval/audio/long_interview_1.wav
# TIME_INTERVIEW_L=$SECONDS
# SECONDS=0
# make -j && ./main -m models/ggml-large-v3.bin -f kotoba-whisper-eval/audio/manzai1.wav
# TIME_MANZAI1_L=$SECONDS
# SECONDS=0
# make -j && ./main -m models/ggml-large-v3.bin -f kotoba-whisper-eval/audio/manzai2.wav
# TIME_MANZAI2_L=$SECONDS
# SECONDS=0
# make -j && ./main -m models/ggml-large-v3.bin --language ja -f kotoba-whisper-eval/audio/manzai3.wav
# TIME_MANZAI3_L=$SECONDS


# summary
echo "MAIN MODEL"
echo "interview: $TIME_INTERVIEW"
echo "manzai1  : $TIME_MANZAI1"
echo "manzai2  : $TIME_MANZAI2"
echo "manzai3  : $TIME_MANZAI3"
echo "QUANTIZED MODEL"
echo "interview: $TIME_INTERVIEW_Q"
echo "manzai1  : $TIME_MANZAI1_Q"
echo "manzai2  : $TIME_MANZAI2_Q"
echo "manzai3  : $TIME_MANZAI3_Q"

# Result on MacBookPro:
# - Apple M2 Pro
# - 32GB
# - 14-inch, 2023
# - OS Sonoma Version 14.4.1 (23E224)
# MAIN MODEL
# interview: 581
# manzai1  : 41
# manzai2  : 30
# manzai3  : 35
# QUANTIZED MODEL
# interview: 677
# manzai1  : 37
# manzai2  : 36
# manzai3  : 42