KingNish commited on
Commit
4cda277
·
verified ·
1 Parent(s): 5af1f06

Upload ./RepCodec/examples/dump_feature.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. RepCodec/examples/dump_feature.py +142 -0
RepCodec/examples/dump_feature.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) ByteDance, Inc. and its affiliates.
2
+ # Copyright (c) Chutong Meng
3
+ #
4
+ # This source code is licensed under the MIT license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ # Based on fairseq (https://github.com/facebookresearch/fairseq)
7
+
8
+ import logging
9
+ import os
10
+ import sys
11
+
12
+ from feature_utils import get_path_iterator, dump_feature
13
+
14
+ logging.basicConfig(
15
+ format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
16
+ datefmt="%Y-%m-%d %H:%M:%S",
17
+ level=os.environ.get("LOGLEVEL", "INFO").upper(),
18
+ stream=sys.stdout,
19
+ )
20
+ logger = logging.getLogger("dump_feature")
21
+
22
+
23
+ def main(
24
+ model_type: str,
25
+ tsv_path: str,
26
+ ckpt_path: str,
27
+ whisper_root: str,
28
+ whisper_name: str,
29
+ layer: int,
30
+ nshard: int,
31
+ rank: int,
32
+ feat_dir: str,
33
+ max_chunk: int,
34
+ use_cpu: bool = False
35
+ ):
36
+ device = "cpu" if use_cpu else "cuda"
37
+
38
+ # some checks
39
+ if model_type in ["hubert", "data2vec"]:
40
+ assert ckpt_path and os.path.exists(ckpt_path)
41
+ elif model_type in ["whisper"]:
42
+ assert whisper_name and whisper_root
43
+ else:
44
+ raise ValueError(f"Unsupported model type {model_type}")
45
+
46
+ reader = None
47
+ if model_type == "hubert":
48
+ from hubert_feature_reader import HubertFeatureReader
49
+ reader = HubertFeatureReader(ckpt_path, layer, device=device, max_chunk=max_chunk)
50
+ elif model_type == "data2vec":
51
+ from data2vec_feature_reader import Data2vecFeatureReader
52
+ reader = Data2vecFeatureReader(ckpt_path, layer, device=device, max_chunk=max_chunk)
53
+ elif model_type == "whisper":
54
+ from whisper_feature_reader import WhisperFeatureReader
55
+ reader = WhisperFeatureReader(whisper_root, whisper_name, layer, device=device)
56
+
57
+ assert reader is not None
58
+
59
+ generator, num = get_path_iterator(tsv_path, nshard, rank)
60
+ dump_feature(reader, generator, num, nshard, rank, feat_dir)
61
+
62
+
63
+ if __name__ == "__main__":
64
+ import argparse
65
+
66
+ parser = argparse.ArgumentParser()
67
+ parser.add_argument(
68
+ "--model_type",
69
+ required=True,
70
+ type=str,
71
+ choices=["data2vec", "hubert", "whisper"],
72
+ help="the type of the speech encoder."
73
+ )
74
+ parser.add_argument(
75
+ "--tsv_path",
76
+ required=True,
77
+ type=str,
78
+ help="the path to the tsv file."
79
+ )
80
+ parser.add_argument(
81
+ "--ckpt_path",
82
+ required=False,
83
+ type=str,
84
+ default=None,
85
+ help="path to the speech model. must provide for HuBERT and data2vec"
86
+ )
87
+ parser.add_argument(
88
+ "--whisper_root",
89
+ required=False,
90
+ type=str,
91
+ default=None,
92
+ help="root dir to download/store whisper model. must provide for whisper model."
93
+ )
94
+ parser.add_argument(
95
+ "--whisper_name",
96
+ required=False,
97
+ type=str,
98
+ default=None,
99
+ help="name of whisper model. e.g., large-v2. must provide for whisper model."
100
+ )
101
+ parser.add_argument(
102
+ "--layer",
103
+ required=True,
104
+ type=int,
105
+ help="which layer of the model. this is 1-based."
106
+ )
107
+ parser.add_argument(
108
+ "--feat_dir",
109
+ required=True,
110
+ type=str,
111
+ help="the output dir to save the representations."
112
+ )
113
+ parser.add_argument(
114
+ "--nshard",
115
+ required=False,
116
+ type=int,
117
+ default=1,
118
+ help="total number of shards."
119
+ )
120
+ parser.add_argument(
121
+ "--rank",
122
+ required=False,
123
+ type=int,
124
+ default=0,
125
+ help="shard id of this process."
126
+ )
127
+ parser.add_argument(
128
+ "--max_chunk",
129
+ type=int,
130
+ default=1600000,
131
+ help="max number of frames of each batch."
132
+ )
133
+ parser.add_argument(
134
+ "--use_cpu",
135
+ default=False,
136
+ action="store_true",
137
+ help="whether use cpu instead of gpu."
138
+ )
139
+ args = parser.parse_args()
140
+ logger.info(args)
141
+
142
+ main(**vars(args))