import numpy as np from fairseq.data.audio.feature_transforms import ( AudioFeatureTransform, register_audio_feature_transform, ) @register_audio_feature_transform("global_cmvn") class GlobalCMVN(AudioFeatureTransform): """Global CMVN (cepstral mean and variance normalization). The global mean and variance need to be pre-computed and stored in NumPy format (.npz).""" @classmethod def from_config_dict(cls, config=None): _config = {} if config is None else config return GlobalCMVN(_config.get("stats_npz_path")) def __init__(self, stats_npz_path): self.stats_npz_path = stats_npz_path stats = np.load(stats_npz_path) self.mean, self.std = stats["mean"], stats["std"] def __repr__(self): return self.__class__.__name__ + f'(stats_npz_path="{self.stats_npz_path}")' def __call__(self, x): x = np.subtract(x, self.mean) x = np.divide(x, self.std) return x