DocWolle commited on
Commit
b779ac5
·
verified ·
1 Parent(s): f813885

Upload Create_mel_vocab.ipynb

Browse files
Files changed (1) hide show
  1. Create_mel_vocab.ipynb +91 -0
Create_mel_vocab.ipynb ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "source": [
20
+ "!git clone https://github.com/openai/whisper.git"
21
+ ],
22
+ "metadata": {
23
+ "id": "1p9gHe1Yi3ai"
24
+ },
25
+ "execution_count": null,
26
+ "outputs": []
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "source": [
31
+ "import io\n",
32
+ "import sys\n",
33
+ "import json\n",
34
+ "import struct\n",
35
+ "import base64\n",
36
+ "import torch\n",
37
+ "import numpy as np\n",
38
+ "from pathlib import Path\n",
39
+ "\n",
40
+ "# SET PARAMETER: True: multilingual False: English only\n",
41
+ "multilingual = True\n",
42
+ "\n",
43
+ "dir_whisper = \"/content/whisper\"\n",
44
+ "dir_out = \"/content/\"\n",
45
+ "\n",
46
+ "# load mel filters\n",
47
+ "n_mels = 80\n",
48
+ "with np.load(Path(dir_whisper) / \"whisper\" / \"assets\" / \"mel_filters.npz\") as f:\n",
49
+ " filters = torch.from_numpy(f[f\"mel_{n_mels}\"])\n",
50
+ "\n",
51
+ "# load tokenizer\n",
52
+ "\n",
53
+ "tokenizer = Path(dir_whisper) / \"whisper\" / \"assets\" / (multilingual and \"multilingual.tiktoken\" or \"gpt2.tiktoken\")\n",
54
+ "\n",
55
+ "with open(tokenizer, \"rb\") as f:\n",
56
+ " contents = f.read()\n",
57
+ " tokens = {base64.b64decode(token): int(rank) for token, rank in (line.split() for line in contents.splitlines() if line)}\n",
58
+ "\n",
59
+ "# output in the same directory as the model\n",
60
+ "fname_out = Path(dir_out) / (multilingual and \"filters_vocab_multilingual.bin\" or \"filters_vocab_en.bin\")\n",
61
+ "\n",
62
+ "fout = fname_out.open(\"wb\")\n",
63
+ "\n",
64
+ "fout.write(struct.pack(\"i\", 0x5553454E))\n",
65
+ "# write mel filters\n",
66
+ "fout.write(struct.pack(\"i\", filters.shape[0]))\n",
67
+ "fout.write(struct.pack(\"i\", filters.shape[1]))\n",
68
+ "for i in range(filters.shape[0]):\n",
69
+ " for j in range(filters.shape[1]):\n",
70
+ " fout.write(struct.pack(\"f\", filters[i][j]))\n",
71
+ "\n",
72
+ "# write tokenizer\n",
73
+ "fout.write(struct.pack(\"i\", len(tokens)))\n",
74
+ "\n",
75
+ "for key in tokens:\n",
76
+ " fout.write(struct.pack(\"i\", len(key)))\n",
77
+ " fout.write(key)\n",
78
+ "\n",
79
+ "fout.close()\n",
80
+ "\n",
81
+ "print(\"Done. Output file: \" , fname_out)\n",
82
+ "print(\"\")"
83
+ ],
84
+ "metadata": {
85
+ "id": "oSJIqeknjLqD"
86
+ },
87
+ "execution_count": null,
88
+ "outputs": []
89
+ }
90
+ ]
91
+ }