File size: 2,795 Bytes
b779ac5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"!git clone https://github.com/openai/whisper.git"
],
"metadata": {
"id": "1p9gHe1Yi3ai"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import io\n",
"import sys\n",
"import json\n",
"import struct\n",
"import base64\n",
"import torch\n",
"import numpy as np\n",
"from pathlib import Path\n",
"\n",
"# SET PARAMETER: True: multilingual False: English only\n",
"multilingual = True\n",
"\n",
"dir_whisper = \"/content/whisper\"\n",
"dir_out = \"/content/\"\n",
"\n",
"# load mel filters\n",
"n_mels = 80\n",
"with np.load(Path(dir_whisper) / \"whisper\" / \"assets\" / \"mel_filters.npz\") as f:\n",
" filters = torch.from_numpy(f[f\"mel_{n_mels}\"])\n",
"\n",
"# load tokenizer\n",
"\n",
"tokenizer = Path(dir_whisper) / \"whisper\" / \"assets\" / (multilingual and \"multilingual.tiktoken\" or \"gpt2.tiktoken\")\n",
"\n",
"with open(tokenizer, \"rb\") as f:\n",
" contents = f.read()\n",
" tokens = {base64.b64decode(token): int(rank) for token, rank in (line.split() for line in contents.splitlines() if line)}\n",
"\n",
"# output in the same directory as the model\n",
"fname_out = Path(dir_out) / (multilingual and \"filters_vocab_multilingual.bin\" or \"filters_vocab_en.bin\")\n",
"\n",
"fout = fname_out.open(\"wb\")\n",
"\n",
"fout.write(struct.pack(\"i\", 0x5553454E))\n",
"# write mel filters\n",
"fout.write(struct.pack(\"i\", filters.shape[0]))\n",
"fout.write(struct.pack(\"i\", filters.shape[1]))\n",
"for i in range(filters.shape[0]):\n",
" for j in range(filters.shape[1]):\n",
" fout.write(struct.pack(\"f\", filters[i][j]))\n",
"\n",
"# write tokenizer\n",
"fout.write(struct.pack(\"i\", len(tokens)))\n",
"\n",
"for key in tokens:\n",
" fout.write(struct.pack(\"i\", len(key)))\n",
" fout.write(key)\n",
"\n",
"fout.close()\n",
"\n",
"print(\"Done. Output file: \" , fname_out)\n",
"print(\"\")"
],
"metadata": {
"id": "oSJIqeknjLqD"
},
"execution_count": null,
"outputs": []
}
]
} |