tlemagueresse commited on
Commit
02c69cc
·
1 Parent(s): 06e8ee0

[WIP] Refactoring for easy model import from HF

Browse files
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_class": "FastModelHuggingFace",
3
+ "framework": "PyTorch + LightGBM",
4
+ "audio_processing_params": {
5
+ "sample_rate": 12000,
6
+ "duration": 3,
7
+ "padding_method": "reflect"
8
+ },
9
+ "features_params": {
10
+ "n_fft": 512,
11
+ "hop_length": 256,
12
+ "pad": 0,
13
+ "power": 2,
14
+ "pad_mode": "reflect",
15
+ "f_min": 70,
16
+ "f_max": 1525,
17
+ "fc_min": 0.05,
18
+ "fc_max": 0.8
19
+ },
20
+ "lgbm_params": {
21
+ "objective": "binary",
22
+ "metric": "binary_logloss",
23
+ "boosting_type": "gbdt",
24
+ "learning_rate": 0.1,
25
+ "num_leaves": 75,
26
+ "max_depth": -1,
27
+ "feature_fraction": 0.8,
28
+ "bagging_fraction": 0.8,
29
+ "bagging_freq": 5,
30
+ "verbosity": -1
31
+ }
32
+
33
+ }
example_usage_fastmodel.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from codecarbon import EmissionsTracker
5
+ from datasets import load_dataset
6
+ from sklearn.metrics import accuracy_score
7
+
8
+ from fast_model import FastModel, save_pipeline
9
+
10
+ dataset = load_dataset("rfcx/frugalai")
11
+ train_dataset = dataset["train"]
12
+ test_dataset = dataset["test"]
13
+ tracker = EmissionsTracker(allow_multiple_runs=True)
14
+ with open("config.json", "r") as file:
15
+ config = json.load(file)
16
+ model = FastModel(
17
+ config["audio_processing_params"],
18
+ config["features_params"],
19
+ config["lgbm_params"],
20
+ )
21
+ model.fit(dataset["train"])
22
+
23
+ # INFERENCE
24
+ tracker.start()
25
+ tracker.start_task("inference")
26
+ true_label = dataset["test"]["label"]
27
+ predictions = model.predict(dataset["test"])
28
+
29
+ emissions_data = tracker.stop_task()
30
+
31
+ print(accuracy_score(true_label, predictions))
32
+ print("energy_consumed_wh", emissions_data.energy_consumed * 1000)
33
+ print("emissions_gco2eq", emissions_data.emissions * 1000)
34
+
35
+ save_pipeline(model, Path("./"))
example_usage_fastmodel_hf.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torchaudio
2
+ from datasets import load_dataset
3
+ from sklearn.metrics import accuracy_score
4
+ from fast_model import FastModelHuggingFace
5
+
6
+ repo_id = "tlmk22/QuefrencyGuardian"
7
+ fast_model = FastModelHuggingFace.from_pretrained(repo_id)
8
+
9
+ # Example: predicting on a single WAV file
10
+ wav_path = "wave_example/chainsaw.wav"
11
+ waveform, sampling_rate = torchaudio.load(wav_path) # Charger le fichier audio
12
+ if sampling_rate != 12000:
13
+ resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=12000)
14
+ waveform = resampler(waveform)
15
+
16
+ # Perform predictions for a single WAV file
17
+ map_labels = {0: "chainsaw", 1: "environment"}
18
+ wav_prediction = fast_model.predict(waveform)
19
+ print(f"Prediction : {map_labels[wav_prediction]}")
20
+
21
+ # Example: predicting on a Hugging Face dataset
22
+ dataset = load_dataset("rfcx/frugalai")
23
+ test_dataset = dataset["test"]
24
+ true_label = dataset["test"]["label"]
25
+
26
+ predictions = fast_model.predict(dataset["test"])
27
+ print(accuracy_score(true_label, predictions))
fast_model.py CHANGED
@@ -1,15 +1,20 @@
1
  import os
2
  import struct
3
  import pickle
 
 
4
 
5
  import numpy as np
6
  import torch
7
  import lightgbm as lgb
8
  import torchaudio
 
9
  from sklearn.exceptions import NotFittedError
 
10
  from torchaudio.transforms import Spectrogram
11
  import torch.nn.functional as F
12
  from datasets.formatting import query_table
 
13
  import warnings
14
 
15
  warnings.filterwarnings("ignore")
@@ -21,7 +26,7 @@ class FastModel:
21
  """
22
  A class designed for training and predicting using LightGBM, incorporating spectral and cepstral features.
23
 
24
- ### Workflow:
25
  1. Batch Loading and Decoding:
26
  Load audio data in batches directly from a table and decode byte-encoded information.
27
 
@@ -36,85 +41,39 @@ class FastModel:
36
  3. Model Application:
37
  Use the extracted features as input for the LightGBM model to perform predictions.
38
 
39
- ### Options for Energy Optimization:
40
- - Feature Selection:
41
- Mask less significant features to reduce computation.
42
- - Signal Truncation:
43
- Process only a limited duration (e.g., a few seconds) of the audio signal.
44
- - Hardware Acceleration:
45
- Utilize CUDA to speed up feature computation when supported.
46
-
47
  Attributes
48
  ----------
 
 
49
  feature_params : dict
50
- Parameters for configuring the MelSpectrogram transformation during training.
51
  lgbm_params : dict, optional
52
  Parameters for configuring the LightGBM model.
53
- model_file : str
54
- Path for saving or loading the trained LightGBM model.
55
- padding_method : str
56
- Padding method to apply when the waveform size is smaller than the desired size.
57
- waveform_duration : float
58
- Duration of the audio waveform to process, in seconds.
59
- mask_features : bool
60
- Whether to enable feature masking for dimensionality reduction.
61
- mask_file : str
62
- Path to save or load the feature mask file.
63
- mask_ratio : float
64
- The ratio of features to retain when feature masking is applied.
65
- batch_size : int
66
- Number of samples per batch during training and prediction.
67
- apply_offset_on_fit : bool
68
- Whether to apply the offset on fit. Useful if waveform_duration is below than 3 seconds.
69
  device : str
70
  Device used for computation ("cpu" or "cuda").
71
-
72
- Methods
73
- -------
74
- _save_feature_mask(model, n_features, ratio):
75
- Saves the most important features as a mask.
76
- _load_feature_mask():
77
- Loads the feature mask from the saved file.
78
- fit(dataset):
79
- Trains the LightGBM model on audio features extracted from the dataset.
80
- predict(dataset, get_proba=False):
81
- Predicts labels or probabilities for a dataset using the trained model.
82
- get_features(audios, spectrogram_transformer, cepstral_transformer):
83
- Extracts features from raw audio using spectrogram and cepstral transformations.
84
  """
85
 
86
  def __init__(
87
  self,
88
- feature_params,
89
- lgbm_params=None,
90
- padding_method="reflect",
91
- waveform_duration=3,
92
- model_file=None,
93
- mask_features=False,
94
- mask_file="feature_mask.pkl",
95
- mask_ratio=0.25,
96
- batch_size=5000,
97
- apply_offset_on_fit=False,
98
- device="cpu",
99
  ):
 
100
  self.feature_params = feature_params
101
  self.lgbm_params = lgbm_params
102
- self.model_file = model_file
103
- self.padding_method = padding_method
104
- self.waveform_duration = waveform_duration
105
- self.mask_features = mask_features
106
- self.mask_file = mask_file
107
- self.mask_ratio = mask_ratio
108
- self.batch_size = batch_size
109
- self.apply_offset_on_fit = apply_offset_on_fit
110
  self.device = torch.device(
111
  "cuda" if device == "cuda" and torch.cuda.is_available() else "cpu"
112
  )
 
 
 
113
  self.spectrogram_transformer = Spectrogram(
114
  n_fft=self.feature_params["n_fft"],
115
  hop_length=self.feature_params["hop_length"],
116
  pad=self.feature_params["pad"],
117
- window_fn=self.feature_params["win_spectrogram"],
118
  power=self.feature_params["power"],
119
  pad_mode=self.feature_params["pad_mode"],
120
  onesided=True,
@@ -130,7 +89,7 @@ class FastModel:
130
  n_fft=self.n_fft_cepstral,
131
  hop_length=self.n_fft_cepstral,
132
  pad=0,
133
- window_fn=self.feature_params["win_cepstral"],
134
  power=self.feature_params["power"],
135
  pad_mode=self.feature_params["pad_mode"],
136
  onesided=True,
@@ -142,27 +101,15 @@ class FastModel:
142
  device=self.device,
143
  )
144
 
145
- def _save_feature_mask(self, model, n_features, ratio):
146
- feature_importance = model.feature_importance(importance_type="gain")
147
- sorted_indices = np.argsort(feature_importance)[::-1]
148
- top_indices = sorted_indices[: max(1, int(n_features * ratio))]
149
- mask = np.zeros(n_features, dtype=bool)
150
- mask[top_indices] = True
151
- with open(self.mask_file, "wb") as f:
152
- pickle.dump(mask, f)
153
-
154
- def _load_feature_mask(self):
155
- with open(self.mask_file, "rb") as f:
156
- return pickle.load(f)
157
-
158
- def fit(self, dataset):
159
- """
160
- Trains a LightGBM model on features extracted from the dataset.
161
 
162
  Parameters
163
  ----------
164
  dataset : Dataset
165
- Dataset object containing audio samples and their corresponding labels.
 
 
166
 
167
  Raises
168
  ------
@@ -170,36 +117,22 @@ class FastModel:
170
  If the dataset is empty or invalid.
171
  """
172
  features, labels = [], []
173
- offsets = [0, 12000, 24000] if self.apply_offset_on_fit else [0]
174
- for offset in offsets:
175
- for audio, label in batch_audio_loader(
176
- dataset,
177
- waveform_duration=self.waveform_duration,
178
- batch_size=self.batch_size,
179
- padding_method=self.padding_method,
180
- offset=offset,
181
- ):
182
- feature = self.get_features(
183
- audio, self.spectrogram_transformer, self.cepstral_transformer
184
- )
185
- features.append(feature)
186
- labels.extend(label)
187
  x_train = torch.cat(features, dim=0)
188
  train_data = lgb.Dataset(x_train.cpu(), label=labels)
189
- model = lgb.train(self.lgbm_params, train_data)
190
-
191
- if self.mask_features:
192
- self._save_feature_mask(model, x_train.shape[1], self.mask_ratio)
193
- mask = self._load_feature_mask()
194
- x_train = x_train[:, mask]
195
- train_data = lgb.Dataset(x_train.cpu(), label=labels)
196
- model = lgb.train(self.lgbm_params, train_data)
197
-
198
- model.save_model(self.model_file)
199
 
200
- def predict(self, dataset, get_proba=False):
201
- """
202
- Predicts labels or probabilities for a dataset using the trained model.
203
 
204
  Parameters
205
  ----------
@@ -207,6 +140,8 @@ class FastModel:
207
  The dataset containing audio data for prediction.
208
  get_proba : bool, optional
209
  If True, returns class probabilities rather than binary predictions (default is False).
 
 
210
 
211
  Returns
212
  -------
@@ -218,49 +153,34 @@ class FastModel:
218
  ------
219
  NotFittedError
220
  If the model is not yet trained.
221
- FileNotFoundError
222
- If the model file does not exist.
223
  """
224
- if not self.model_file:
225
- raise NotFittedError("The model is not trained yet. Train using the `fit` method.")
226
- if not os.path.isfile(self.model_file):
227
- raise FileNotFoundError(f"Model file {self.model_file} not found.")
228
-
229
  features = []
230
  for audio, _ in batch_audio_loader(
231
  dataset,
232
- waveform_duration=self.waveform_duration,
233
- batch_size=self.batch_size,
234
- padding_method=self.padding_method,
 
235
  ):
236
- feature = self.get_features(
237
- audio, self.spectrogram_transformer, self.cepstral_transformer
238
- )
239
  features.append(feature)
240
  features = torch.cat(features, dim=0)
241
  torch.cuda.empty_cache()
242
 
243
- if self.mask_features:
244
- mask = self._load_feature_mask()
245
- features = features[:, mask]
246
-
247
- model = lgb.Booster(model_file=self.model_file)
248
- y_score = model.predict(features.cpu())
249
 
250
  return y_score if get_proba else (y_score >= 0.5).astype(int)
251
 
252
- def get_features(self, audios, spectrogram_transformer, cepstral_transformer):
253
  """
254
  Extracts features from raw audio using spectrogram and cepstrum transformations.
255
 
256
  Parameters
257
  ----------
258
  audios : torch.Tensor
259
- A batch of audio waveforms as 1D tensors.
260
- spectrogram_transformer : Spectrogram
261
- Transformation used to compute MelSpectrogram features.
262
- cepstral_transformer : Spectrogram
263
- Transformation used to compute cepstral features.
264
 
265
  Returns
266
  -------
@@ -273,9 +193,9 @@ class FastModel:
273
  If the input audio tensor is empty or invalid.
274
  """
275
  audios = audios.to(self.device)
276
- sxx = spectrogram_transformer(audios) # shape : (n_audios, n_f, n_blocks)
277
  sxx = torch.log10(torch.clamp(sxx.permute(0, 2, 1), min=1e-10))
278
- cepstral_mat = cepstral_transformer(sxx[:, :, self.ind_f_filtered]).squeeze(dim=3)[
279
  :, :, self.ind_cf_filtered
280
  ]
281
 
@@ -289,22 +209,21 @@ class FastModel:
289
 
290
 
291
  def batch_audio_loader(
292
- dataset,
293
- waveform_duration=3,
294
- batch_size=1,
295
- sr=12000,
296
- device="cpu",
297
- padding_method=None,
298
- offset=0,
299
  ):
300
- """
301
- Loads and preprocesses audio data from a dataset for training or inference in batches.
302
 
303
  Parameters
304
  ----------
305
  dataset : Dataset
306
  The dataset containing audio samples and labels.
307
- waveform_duration : float, optional
308
  Desired duration of the audio waveforms in seconds (default is 3).
309
  batch_size : int, optional
310
  Number of audio samples per batch (default is 1).
@@ -319,10 +238,10 @@ def batch_audio_loader(
319
 
320
  Yields
321
  ------
322
- tuple
323
  A tuple (batch_audios, batch_labels), where:
324
- - batch_audios is a tensor of processed audio waveforms.
325
- - batch_labels is a tensor of corresponding audio labels.
326
 
327
  Raises
328
  ------
@@ -399,7 +318,11 @@ def batch_audio_loader(
399
  yield batch_audios_on_device, batch_labels_on_device
400
 
401
 
402
- def apply_padding(waveform, output_size, padding_method="zero"):
 
 
 
 
403
  """
404
  Applies padding to the waveform when its size is smaller than the desired output size.
405
 
@@ -432,3 +355,223 @@ def apply_padding(waveform, output_size, padding_method="zero"):
432
 
433
  return F.pad(waveform.unsqueeze(0), (0, total_pad), mode=padding_method).squeeze()
434
  raise ValueError(f"Invalid padding method: {padding_method}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import struct
3
  import pickle
4
+ from pathlib import Path
5
+ from typing import Literal, Union
6
 
7
  import numpy as np
8
  import torch
9
  import lightgbm as lgb
10
  import torchaudio
11
+ from huggingface_hub import hf_hub_download
12
  from sklearn.exceptions import NotFittedError
13
+ from torch import Tensor
14
  from torchaudio.transforms import Spectrogram
15
  import torch.nn.functional as F
16
  from datasets.formatting import query_table
17
+ from datasets import Dataset
18
  import warnings
19
 
20
  warnings.filterwarnings("ignore")
 
26
  """
27
  A class designed for training and predicting using LightGBM, incorporating spectral and cepstral features.
28
 
29
+ Workflow:
30
  1. Batch Loading and Decoding:
31
  Load audio data in batches directly from a table and decode byte-encoded information.
32
 
 
41
  3. Model Application:
42
  Use the extracted features as input for the LightGBM model to perform predictions.
43
 
 
 
 
 
 
 
 
 
44
  Attributes
45
  ----------
46
+ audio_processing_params : dict
47
+ Parameters for configuring audio processing.
48
  feature_params : dict
49
+ Parameters for configuring the Spectrogram and Cepstrogram transformation.
50
  lgbm_params : dict, optional
51
  Parameters for configuring the LightGBM model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  device : str
53
  Device used for computation ("cpu" or "cuda").
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  """
55
 
56
  def __init__(
57
  self,
58
+ audio_processing_params: dict,
59
+ feature_params: dict,
60
+ lgbm_params: dict,
61
+ device: str = "cuda",
 
 
 
 
 
 
 
62
  ):
63
+ self.audio_processing_params = audio_processing_params
64
  self.feature_params = feature_params
65
  self.lgbm_params = lgbm_params
 
 
 
 
 
 
 
 
66
  self.device = torch.device(
67
  "cuda" if device == "cuda" and torch.cuda.is_available() else "cpu"
68
  )
69
+ self.model = None
70
+
71
+ # Initialize Spectrogram & Cepstrogram
72
  self.spectrogram_transformer = Spectrogram(
73
  n_fft=self.feature_params["n_fft"],
74
  hop_length=self.feature_params["hop_length"],
75
  pad=self.feature_params["pad"],
76
+ window_fn=torch.hamming_window,
77
  power=self.feature_params["power"],
78
  pad_mode=self.feature_params["pad_mode"],
79
  onesided=True,
 
89
  n_fft=self.n_fft_cepstral,
90
  hop_length=self.n_fft_cepstral,
91
  pad=0,
92
+ window_fn=torch.hamming_window,
93
  power=self.feature_params["power"],
94
  pad_mode=self.feature_params["pad_mode"],
95
  onesided=True,
 
101
  device=self.device,
102
  )
103
 
104
+ def fit(self, dataset: Dataset, batch_size: int = 5000):
105
+ """Trains a LightGBM model on features extracted from the dataset.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  Parameters
108
  ----------
109
  dataset : Dataset
110
+ Arrow Dataset object containing audio samples and their corresponding labels.
111
+ batch_size : int, optional
112
+ Number of audio samples per batch (default is 5000).
113
 
114
  Raises
115
  ------
 
117
  If the dataset is empty or invalid.
118
  """
119
  features, labels = [], []
120
+ for audio, label in batch_audio_loader(
121
+ dataset,
122
+ waveform_duration=self.audio_processing_params["duration"],
123
+ batch_size=batch_size,
124
+ padding_method=self.audio_processing_params["padding_method"],
125
+ device=self.device,
126
+ ):
127
+ feature = self.get_features(audio)
128
+ features.append(feature)
129
+ labels.extend(label)
 
 
 
 
130
  x_train = torch.cat(features, dim=0)
131
  train_data = lgb.Dataset(x_train.cpu(), label=labels)
132
+ self.model = lgb.train(self.lgbm_params, train_data)
 
 
 
 
 
 
 
 
 
133
 
134
+ def predict(self, dataset: Dataset, get_proba: bool = False, batch_size: int = 5000):
135
+ """Predicts labels or probabilities for a dataset using the trained model.
 
136
 
137
  Parameters
138
  ----------
 
140
  The dataset containing audio data for prediction.
141
  get_proba : bool, optional
142
  If True, returns class probabilities rather than binary predictions (default is False).
143
+ batch_size : int, optional
144
+ Number of audio samples per batch (default is 5000).
145
 
146
  Returns
147
  -------
 
153
  ------
154
  NotFittedError
155
  If the model is not yet trained.
 
 
156
  """
157
+ if not self.model:
158
+ raise NotFittedError("LGBM model is not fitted yet.")
 
 
 
159
  features = []
160
  for audio, _ in batch_audio_loader(
161
  dataset,
162
+ waveform_duration=self.audio_processing_params["duration"],
163
+ batch_size=batch_size,
164
+ padding_method=self.audio_processing_params["padding_method"],
165
+ device=self.device,
166
  ):
167
+ feature = self.get_features(audio)
 
 
168
  features.append(feature)
169
  features = torch.cat(features, dim=0)
170
  torch.cuda.empty_cache()
171
 
172
+ y_score = self.model.predict(features.cpu())
 
 
 
 
 
173
 
174
  return y_score if get_proba else (y_score >= 0.5).astype(int)
175
 
176
+ def get_features(self, audios: Tensor):
177
  """
178
  Extracts features from raw audio using spectrogram and cepstrum transformations.
179
 
180
  Parameters
181
  ----------
182
  audios : torch.Tensor
183
+ A batch of audio waveforms as 2D tensors (n_audios, n_samples_per_audio).
 
 
 
 
184
 
185
  Returns
186
  -------
 
193
  If the input audio tensor is empty or invalid.
194
  """
195
  audios = audios.to(self.device)
196
+ sxx = self.spectrogram_transformer(audios) # shape : (n_audios, n_f, n_blocks)
197
  sxx = torch.log10(torch.clamp(sxx.permute(0, 2, 1), min=1e-10))
198
+ cepstral_mat = self.cepstral_transformer(sxx[:, :, self.ind_f_filtered]).squeeze(dim=3)[
199
  :, :, self.ind_cf_filtered
200
  ]
201
 
 
209
 
210
 
211
  def batch_audio_loader(
212
+ dataset: Dataset,
213
+ waveform_duration: int = 3,
214
+ batch_size: int = 1,
215
+ sr: int = 12000,
216
+ device: Literal["cpu", "cuda"] = "cpu",
217
+ padding_method: None | Literal["zero", "reflect", "replicate", "circular"] = None,
218
+ offset: int = 0,
219
  ):
220
+ """Optimized loader for audio data from a dataset for training or inference in batches.
 
221
 
222
  Parameters
223
  ----------
224
  dataset : Dataset
225
  The dataset containing audio samples and labels.
226
+ waveform_duration : int, optional
227
  Desired duration of the audio waveforms in seconds (default is 3).
228
  batch_size : int, optional
229
  Number of audio samples per batch (default is 1).
 
238
 
239
  Yields
240
  ------
241
+ tuple (Tensor, Tensor)
242
  A tuple (batch_audios, batch_labels), where:
243
+ - batch_audios is a torch.tensor of processed audio waveforms.
244
+ - batch_labels is a torch.tensor of corresponding audio labels.
245
 
246
  Raises
247
  ------
 
318
  yield batch_audios_on_device, batch_labels_on_device
319
 
320
 
321
+ def apply_padding(
322
+ waveform: torch.Tensor,
323
+ output_size: int,
324
+ padding_method: Literal["zero", "reflect", "replicate", "circular"] = "zero",
325
+ ) -> torch.Tensor:
326
  """
327
  Applies padding to the waveform when its size is smaller than the desired output size.
328
 
 
355
 
356
  return F.pad(waveform.unsqueeze(0), (0, total_pad), mode=padding_method).squeeze()
357
  raise ValueError(f"Invalid padding method: {padding_method}")
358
+
359
+
360
+ class FastModelHuggingFace:
361
+ """
362
+ Class for loading a FastModel instance from the Hugging Face Hub.
363
+ Includes preprocessing pipelines and a LightGBM model.
364
+
365
+ Attributes
366
+ ----------
367
+ pipeline : object
368
+ The serialized preprocessing pipeline.
369
+ model : lgb.Booster
370
+ The LightGBM model instance used for predictions.
371
+
372
+ Methods
373
+ -------
374
+ from_pretrained(repo_id: str, revision: str = "main",
375
+ pipeline_file_name: str = "pipeline.pkl",
376
+ model_file_name: str = "model_lightgbm.txt") -> "FastModelHuggingFace":
377
+ Loads the FastModel pipeline and model from the Hugging Face Hub.
378
+ predict(input_data: Union[str, "HuggingFaceDataset"], get_proba: bool = False) -> np.ndarray:
379
+ Predicts labels or probabilities for a WAV file or dataset.
380
+ """
381
+
382
+ def __init__(self, pipeline: object, lightgbm_model: lgb.Booster):
383
+ """
384
+ Initializes a FastModelHuggingFace instance.
385
+
386
+ Parameters
387
+ ----------
388
+ pipeline : object
389
+ The serialized preprocessing pipeline.
390
+ lightgbm_model : lgb.Booster
391
+ A LightGBM booster model for predictions.
392
+ """
393
+ self.pipeline = pipeline
394
+ self.model = lightgbm_model
395
+
396
+ @classmethod
397
+ def from_pretrained(
398
+ cls,
399
+ repo_id: str,
400
+ revision: str = "main",
401
+ pipeline_file_name: str = "pipeline.pkl",
402
+ model_file_name: str = "model_lightgbm.txt",
403
+ ) -> "FastModelHuggingFace":
404
+ """
405
+ Loads the FastModel pipeline and LightGBM model from the Hugging Face Hub.
406
+
407
+ Parameters
408
+ ----------
409
+ repo_id : str
410
+ The Hugging Face repository ID.
411
+ revision : str, optional
412
+ The specific revision of the repository to use (default is "main").
413
+ pipeline_file_name : str, optional
414
+ The filename of the serialized pipeline (default is "pipeline.pkl").
415
+ model_file_name : str, optional
416
+ The filename of the LightGBM model (default is "model_lightgbm.txt").
417
+
418
+ Returns
419
+ -------
420
+ FastModelHuggingFace
421
+ A FastModelHuggingFace instance with the loaded pipeline and model.
422
+
423
+ Raises
424
+ ------
425
+ FileNotFoundError
426
+ If either the pipeline or LightGBM model files are missing or corrupted.
427
+ """
428
+ pipeline_path = hf_hub_download(repo_id, filename=pipeline_file_name, revision=revision)
429
+ model_lgbm_path = hf_hub_download(repo_id, filename=model_file_name, revision=revision)
430
+
431
+ if not os.path.exists(pipeline_path):
432
+ raise FileNotFoundError(f"Pipeline file {pipeline_path} is missing or corrupted.")
433
+ with open(pipeline_path, "rb") as f:
434
+ pipeline = pickle.load(f)
435
+
436
+ if not os.path.exists(model_lgbm_path):
437
+ raise FileNotFoundError(
438
+ f"LightGBM model file {model_lgbm_path} is missing or corrupted."
439
+ )
440
+ lightgbm_model = lgb.Booster(model_file=model_lgbm_path)
441
+
442
+ return cls(pipeline=pipeline, lightgbm_model=lightgbm_model)
443
+
444
+ def predict(
445
+ self,
446
+ input_data: Union[str, "HuggingFaceDataset"],
447
+ get_proba: bool = False,
448
+ batch_size: int = 5000,
449
+ device: Literal["cpu", "cuda"] = "cuda",
450
+ ) -> np.ndarray:
451
+ """
452
+ Predicts labels or probabilities for a given audio input.
453
+
454
+ Parameters
455
+ ----------
456
+ input_data : Union[str, HuggingFaceDataset]
457
+ The input for prediction, either the path to a WAV file or a Hugging Face dataset.
458
+ get_proba : bool, optional
459
+ If True, returns class probabilities instead of binary predictions (default is False).
460
+ batch_size : int, optional
461
+ Number of audio samples per batch (default is 5000).
462
+ device : Literal["cpu", "cuda"]
463
+
464
+ Returns
465
+ -------
466
+ np.ndarray
467
+ If `get_proba` is True, returns an array of probabilities.
468
+ If `get_proba` is False, returns binary predictions.
469
+
470
+ Raises
471
+ ------
472
+ ValueError
473
+ If the input data type is neither a WAV file path string nor a Hugging Face dataset.
474
+ """
475
+ if isinstance(input_data, str):
476
+ audio_waveform, sr = torchaudio.load(input_data)
477
+ audio_waveform = audio_waveform.mean(dim=0)
478
+ if sr != self.pipeline.audio_processing_params["sample_rate"]:
479
+ resampler = torchaudio.transforms.Resample(
480
+ orig_freq=sr, new_freq=self.pipeline.audio_processing_params["sample_rate"]
481
+ )
482
+ audio_waveform = resampler(audio_waveform)
483
+ features = self.pipeline.get_features(audio_waveform.unsqueeze(0).to(device))
484
+ predictions = self.model.predict(features.cpu().numpy())
485
+ return predictions if get_proba else (predictions >= 0.5).astype(int)
486
+
487
+ elif hasattr(input_data, "_data"):
488
+ features = []
489
+ for batch_audios, _ in self.pipeline.batch_audio_loader(
490
+ input_data,
491
+ waveform_duration=self.pipeline.audio_processing_params["duration"],
492
+ batch_size=batch_size,
493
+ padding_method=self.pipeline.audio_processing_params["padding_method"],
494
+ device=device,
495
+ ):
496
+ batch_features = self.pipeline.get_features(batch_audios)
497
+ features.append(batch_features)
498
+ features = torch.cat(features, dim=0)
499
+ predictions = self.model.predict(features.cpu().numpy())
500
+ return predictions if get_proba else (predictions >= 0.5).astype(int)
501
+ else:
502
+ raise ValueError("Input must be either a path to a WAV file or a Hugging Face Dataset.")
503
+
504
+
505
+ def save_pipeline(
506
+ model_class_instance: FastModelHuggingFace,
507
+ path: str,
508
+ lgbm_file_name: str = None,
509
+ pipeline_file_name: str = None,
510
+ ):
511
+ """
512
+ Serializes the complete FastModel instance for saving.
513
+
514
+ Parameters
515
+ ----------
516
+ model_class_instance : FastModelHuggingFace
517
+ The trained FastModel instance to serialize.
518
+ path : str
519
+ The directory to save the FastModel instance.
520
+ lgbm_file_name : str, optional
521
+ The filename for saving the LightGBM model (default is "model_fast_model.txt").
522
+ pipeline_file_name : str, optional
523
+ The filename for saving the pipeline (default is "pipeline.pkl").
524
+ """
525
+ lgbm_file_name = lgbm_file_name or "model_fast_model.txt"
526
+ pipeline_file_name = pipeline_file_name or "pipeline.pkl"
527
+
528
+ lightgbm_path = Path(path) / lgbm_file_name
529
+ if model_class_instance.model:
530
+ model_class_instance.model_file_name = str(lightgbm_path)
531
+ model_class_instance.model.save_model(model_class_instance.model_file_name)
532
+
533
+ pipeline_path = Path(path) / pipeline_file_name
534
+ with open(pipeline_path, "wb") as f:
535
+ pickle.dump(model_class_instance, f)
536
+
537
+
538
+ def load_pipeline(
539
+ path: str, lgbm_file_name: str = None, pipeline_file_name: str = None
540
+ ) -> FastModelHuggingFace:
541
+ """
542
+ Loads a serialized pipeline and LightGBM model.
543
+
544
+ Parameters
545
+ ----------
546
+ path : str
547
+ The directory containing the serialized FastModel.
548
+ lgbm_file_name : str, optional
549
+ The filename for the LightGBM model (default is "model_fast_model.txt").
550
+ pipeline_file_name : str, optional
551
+ The filename for the pipeline (default is "pipeline.pkl").
552
+
553
+ Returns
554
+ -------
555
+ FastModelHuggingFace
556
+ An instance of the loaded FastModel.
557
+
558
+ Raises
559
+ ------
560
+ FileNotFoundError
561
+ If either the LightGBM model or pipeline file is not found.
562
+ """
563
+ lgbm_file_name = lgbm_file_name or "model_fast_model.txt"
564
+ pipeline_file_name = pipeline_file_name or "pipeline.pkl"
565
+
566
+ pipeline_path = Path(path) / pipeline_file_name
567
+ if not pipeline_path.exists():
568
+ raise FileNotFoundError(f"Pipeline file {pipeline_path} not found.")
569
+ with open(pipeline_path, "rb") as f:
570
+ model_class_instance = pickle.load(f)
571
+
572
+ lightgbm_path = Path(path) / lgbm_file_name
573
+ if not lightgbm_path.exists():
574
+ raise FileNotFoundError(f"LightGBM file {lightgbm_path} not found.")
575
+ model_class_instance.model = lgb.Booster(model_file=str(lightgbm_path))
576
+
577
+ return model_class_instance
model/features.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "n_fft": 512,
3
- "hop_length": 256,
4
- "pad": 0,
5
- "win_spectrogram": "Hamming Window",
6
- "win_cepstral": "Hamming Window",
7
- "power": 2,
8
- "pad_mode": "reflect",
9
- "f_min": 70,
10
- "f_max": 1525,
11
- "fc_min": 0.05,
12
- "fc_max": 0.8,
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/lgbm_params.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "objective": "binary",
3
- "metric": "binary_logloss",
4
- "boosting_type": "gbdt",
5
- "learning_rate": 0.1,
6
- "num_leaves": 75,
7
- "max_depth": -1,
8
- "feature_fraction": 0.8,
9
- "bagging_fraction": 0.8,
10
- "bagging_freq": 5,
11
- "verbosity": -1,
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
model/model.txt → model_fast_model.txt RENAMED
File without changes
pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3243c0fd7f6cafa8492132711b0376da91838029cfe1362e2fc19ee6bf847894
3
+ size 834063