In [None]:
!git clone https://github.com/openai/whisper.git

In [None]:
import io
import sys
import json
import struct
import base64
import torch
import numpy as np
from pathlib import Path

# SET PARAMETER: True: multilingual False: English only
multilingual = True

dir_whisper = "/content/whisper"
dir_out = "/content/"

# load mel filters
n_mels = 80
with np.load(Path(dir_whisper) / "whisper" / "assets" / "mel_filters.npz") as f:
    filters = torch.from_numpy(f[f"mel_{n_mels}"])

# load tokenizer

tokenizer = Path(dir_whisper) / "whisper" / "assets" / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")

with open(tokenizer, "rb") as f:
    contents = f.read()
    tokens = {base64.b64decode(token): int(rank) for token, rank in (line.split() for line in contents.splitlines() if line)}

# output in the same directory as the model
fname_out = Path(dir_out) / (multilingual and "filters_vocab_multilingual.bin" or "filters_vocab_en.bin")

fout = fname_out.open("wb")

fout.write(struct.pack("i", 0x5553454E))
# write mel filters
fout.write(struct.pack("i", filters.shape[0]))
fout.write(struct.pack("i", filters.shape[1]))
for i in range(filters.shape[0]):
    for j in range(filters.shape[1]):
        fout.write(struct.pack("f", filters[i][j]))

# write tokenizer
fout.write(struct.pack("i", len(tokens)))

for key in tokens:
    fout.write(struct.pack("i", len(key)))
    fout.write(key)

fout.close()

print("Done. Output file: " , fname_out)
print("")