{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "!git clone https://github.com/openai/whisper.git" ], "metadata": { "id": "1p9gHe1Yi3ai" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import io\n", "import sys\n", "import json\n", "import struct\n", "import base64\n", "import torch\n", "import numpy as np\n", "from pathlib import Path\n", "\n", "# SET PARAMETER: True: multilingual False: English only\n", "multilingual = True\n", "\n", "dir_whisper = \"/content/whisper\"\n", "dir_out = \"/content/\"\n", "\n", "# load mel filters\n", "n_mels = 80\n", "with np.load(Path(dir_whisper) / \"whisper\" / \"assets\" / \"mel_filters.npz\") as f:\n", " filters = torch.from_numpy(f[f\"mel_{n_mels}\"])\n", "\n", "# load tokenizer\n", "\n", "tokenizer = Path(dir_whisper) / \"whisper\" / \"assets\" / (multilingual and \"multilingual.tiktoken\" or \"gpt2.tiktoken\")\n", "\n", "with open(tokenizer, \"rb\") as f:\n", " contents = f.read()\n", " tokens = {base64.b64decode(token): int(rank) for token, rank in (line.split() for line in contents.splitlines() if line)}\n", "\n", "# output in the same directory as the model\n", "fname_out = Path(dir_out) / (multilingual and \"filters_vocab_multilingual.bin\" or \"filters_vocab_en.bin\")\n", "\n", "fout = fname_out.open(\"wb\")\n", "\n", "fout.write(struct.pack(\"i\", 0x5553454E))\n", "# write mel filters\n", "fout.write(struct.pack(\"i\", filters.shape[0]))\n", "fout.write(struct.pack(\"i\", filters.shape[1]))\n", "for i in range(filters.shape[0]):\n", " for j in range(filters.shape[1]):\n", " fout.write(struct.pack(\"f\", filters[i][j]))\n", "\n", "# write tokenizer\n", "fout.write(struct.pack(\"i\", len(tokens)))\n", "\n", "for key in tokens:\n", " fout.write(struct.pack(\"i\", len(key)))\n", " fout.write(key)\n", "\n", "fout.close()\n", "\n", "print(\"Done. Output file: \" , fname_out)\n", "print(\"\")" ], "metadata": { "id": "oSJIqeknjLqD" }, "execution_count": null, "outputs": [] } ] }