File size: 2,795 Bytes
b779ac5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "!git clone https://github.com/openai/whisper.git"
      ],
      "metadata": {
        "id": "1p9gHe1Yi3ai"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import io\n",
        "import sys\n",
        "import json\n",
        "import struct\n",
        "import base64\n",
        "import torch\n",
        "import numpy as np\n",
        "from pathlib import Path\n",
        "\n",
        "# SET PARAMETER: True: multilingual False: English only\n",
        "multilingual = True\n",
        "\n",
        "dir_whisper = \"/content/whisper\"\n",
        "dir_out = \"/content/\"\n",
        "\n",
        "# load mel filters\n",
        "n_mels = 80\n",
        "with np.load(Path(dir_whisper) / \"whisper\" / \"assets\" / \"mel_filters.npz\") as f:\n",
        "    filters = torch.from_numpy(f[f\"mel_{n_mels}\"])\n",
        "\n",
        "# load tokenizer\n",
        "\n",
        "tokenizer = Path(dir_whisper) / \"whisper\" / \"assets\" / (multilingual and \"multilingual.tiktoken\" or \"gpt2.tiktoken\")\n",
        "\n",
        "with open(tokenizer, \"rb\") as f:\n",
        "    contents = f.read()\n",
        "    tokens = {base64.b64decode(token): int(rank) for token, rank in (line.split() for line in contents.splitlines() if line)}\n",
        "\n",
        "# output in the same directory as the model\n",
        "fname_out = Path(dir_out) / (multilingual and \"filters_vocab_multilingual.bin\" or \"filters_vocab_en.bin\")\n",
        "\n",
        "fout = fname_out.open(\"wb\")\n",
        "\n",
        "fout.write(struct.pack(\"i\", 0x5553454E))\n",
        "# write mel filters\n",
        "fout.write(struct.pack(\"i\", filters.shape[0]))\n",
        "fout.write(struct.pack(\"i\", filters.shape[1]))\n",
        "for i in range(filters.shape[0]):\n",
        "    for j in range(filters.shape[1]):\n",
        "        fout.write(struct.pack(\"f\", filters[i][j]))\n",
        "\n",
        "# write tokenizer\n",
        "fout.write(struct.pack(\"i\", len(tokens)))\n",
        "\n",
        "for key in tokens:\n",
        "    fout.write(struct.pack(\"i\", len(key)))\n",
        "    fout.write(key)\n",
        "\n",
        "fout.close()\n",
        "\n",
        "print(\"Done. Output file: \" , fname_out)\n",
        "print(\"\")"
      ],
      "metadata": {
        "id": "oSJIqeknjLqD"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}