Upload ./RepCodec/examples/dump_feature.py with huggingface_hub
Browse files
RepCodec/examples/dump_feature.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) ByteDance, Inc. and its affiliates.
|
2 |
+
# Copyright (c) Chutong Meng
|
3 |
+
#
|
4 |
+
# This source code is licensed under the MIT license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
# Based on fairseq (https://github.com/facebookresearch/fairseq)
|
7 |
+
|
8 |
+
import logging
|
9 |
+
import os
|
10 |
+
import sys
|
11 |
+
|
12 |
+
from feature_utils import get_path_iterator, dump_feature
|
13 |
+
|
14 |
+
logging.basicConfig(
|
15 |
+
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
16 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
17 |
+
level=os.environ.get("LOGLEVEL", "INFO").upper(),
|
18 |
+
stream=sys.stdout,
|
19 |
+
)
|
20 |
+
logger = logging.getLogger("dump_feature")
|
21 |
+
|
22 |
+
|
23 |
+
def main(
|
24 |
+
model_type: str,
|
25 |
+
tsv_path: str,
|
26 |
+
ckpt_path: str,
|
27 |
+
whisper_root: str,
|
28 |
+
whisper_name: str,
|
29 |
+
layer: int,
|
30 |
+
nshard: int,
|
31 |
+
rank: int,
|
32 |
+
feat_dir: str,
|
33 |
+
max_chunk: int,
|
34 |
+
use_cpu: bool = False
|
35 |
+
):
|
36 |
+
device = "cpu" if use_cpu else "cuda"
|
37 |
+
|
38 |
+
# some checks
|
39 |
+
if model_type in ["hubert", "data2vec"]:
|
40 |
+
assert ckpt_path and os.path.exists(ckpt_path)
|
41 |
+
elif model_type in ["whisper"]:
|
42 |
+
assert whisper_name and whisper_root
|
43 |
+
else:
|
44 |
+
raise ValueError(f"Unsupported model type {model_type}")
|
45 |
+
|
46 |
+
reader = None
|
47 |
+
if model_type == "hubert":
|
48 |
+
from hubert_feature_reader import HubertFeatureReader
|
49 |
+
reader = HubertFeatureReader(ckpt_path, layer, device=device, max_chunk=max_chunk)
|
50 |
+
elif model_type == "data2vec":
|
51 |
+
from data2vec_feature_reader import Data2vecFeatureReader
|
52 |
+
reader = Data2vecFeatureReader(ckpt_path, layer, device=device, max_chunk=max_chunk)
|
53 |
+
elif model_type == "whisper":
|
54 |
+
from whisper_feature_reader import WhisperFeatureReader
|
55 |
+
reader = WhisperFeatureReader(whisper_root, whisper_name, layer, device=device)
|
56 |
+
|
57 |
+
assert reader is not None
|
58 |
+
|
59 |
+
generator, num = get_path_iterator(tsv_path, nshard, rank)
|
60 |
+
dump_feature(reader, generator, num, nshard, rank, feat_dir)
|
61 |
+
|
62 |
+
|
63 |
+
if __name__ == "__main__":
|
64 |
+
import argparse
|
65 |
+
|
66 |
+
parser = argparse.ArgumentParser()
|
67 |
+
parser.add_argument(
|
68 |
+
"--model_type",
|
69 |
+
required=True,
|
70 |
+
type=str,
|
71 |
+
choices=["data2vec", "hubert", "whisper"],
|
72 |
+
help="the type of the speech encoder."
|
73 |
+
)
|
74 |
+
parser.add_argument(
|
75 |
+
"--tsv_path",
|
76 |
+
required=True,
|
77 |
+
type=str,
|
78 |
+
help="the path to the tsv file."
|
79 |
+
)
|
80 |
+
parser.add_argument(
|
81 |
+
"--ckpt_path",
|
82 |
+
required=False,
|
83 |
+
type=str,
|
84 |
+
default=None,
|
85 |
+
help="path to the speech model. must provide for HuBERT and data2vec"
|
86 |
+
)
|
87 |
+
parser.add_argument(
|
88 |
+
"--whisper_root",
|
89 |
+
required=False,
|
90 |
+
type=str,
|
91 |
+
default=None,
|
92 |
+
help="root dir to download/store whisper model. must provide for whisper model."
|
93 |
+
)
|
94 |
+
parser.add_argument(
|
95 |
+
"--whisper_name",
|
96 |
+
required=False,
|
97 |
+
type=str,
|
98 |
+
default=None,
|
99 |
+
help="name of whisper model. e.g., large-v2. must provide for whisper model."
|
100 |
+
)
|
101 |
+
parser.add_argument(
|
102 |
+
"--layer",
|
103 |
+
required=True,
|
104 |
+
type=int,
|
105 |
+
help="which layer of the model. this is 1-based."
|
106 |
+
)
|
107 |
+
parser.add_argument(
|
108 |
+
"--feat_dir",
|
109 |
+
required=True,
|
110 |
+
type=str,
|
111 |
+
help="the output dir to save the representations."
|
112 |
+
)
|
113 |
+
parser.add_argument(
|
114 |
+
"--nshard",
|
115 |
+
required=False,
|
116 |
+
type=int,
|
117 |
+
default=1,
|
118 |
+
help="total number of shards."
|
119 |
+
)
|
120 |
+
parser.add_argument(
|
121 |
+
"--rank",
|
122 |
+
required=False,
|
123 |
+
type=int,
|
124 |
+
default=0,
|
125 |
+
help="shard id of this process."
|
126 |
+
)
|
127 |
+
parser.add_argument(
|
128 |
+
"--max_chunk",
|
129 |
+
type=int,
|
130 |
+
default=1600000,
|
131 |
+
help="max number of frames of each batch."
|
132 |
+
)
|
133 |
+
parser.add_argument(
|
134 |
+
"--use_cpu",
|
135 |
+
default=False,
|
136 |
+
action="store_true",
|
137 |
+
help="whether use cpu instead of gpu."
|
138 |
+
)
|
139 |
+
args = parser.parse_args()
|
140 |
+
logger.info(args)
|
141 |
+
|
142 |
+
main(**vars(args))
|