ImenMourali commited on
Commit
adf98b6
·
verified ·
1 Parent(s): 398ac10

Update tasks/audio.py

Browse files
Files changed (1) hide show
  1. tasks/audio.py +140 -223
tasks/audio.py CHANGED
@@ -16,276 +16,193 @@ router = APIRouter()
16
  DESCRIPTION = "Random Baseline"
17
  ROUTE = "/audio"
18
 
19
-
20
- from sklearn.metrics import accuracy_score
21
-
22
  @router.post(ROUTE, tags=["Audio Task"],
23
  description=DESCRIPTION)
24
  async def evaluate_audio(request: AudioEvaluationRequest):
25
-
26
- # Map string predictions to numeric labels
27
- numeric_predictions = map_predictions_to_labels(predictions)
28
-
29
- # Extract true labels (already numeric)
30
- true_labels = test_dataset["label"]
31
-
32
- # Calculate accuracy
33
- accuracy = accuracy_score(true_labels, numeric_predictions)
34
- print("Accuracy:", accuracy)
35
-
36
- # Get space info
37
- username, space_url = get_space_info()
38
-
39
- # Define the label mapping
40
- LABEL_MAPPING = {
41
- "chainsaw": 0,
42
- "environment": 1
43
- }
44
  # Load and prepare the dataset
45
  # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
46
- dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
47
-
48
  # Split dataset
49
  train_test = dataset["train"]
50
  test_dataset = dataset["test"]
51
-
52
  # Start tracking emissions
53
  tracker.start()
54
  tracker.start_task("inference")
55
-
56
  #--------------------------------------------------------------------------------------------
57
  # YOUR MODEL INFERENCE CODE HERE
58
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
59
  #--------------------------------------------------------------------------------------------
60
- import tensorflow as tf
61
- import tensorflow_hub as hub
62
- import librosa
63
- import numpy as np
64
- import os
65
-
66
- # Load the YAMNet model from TensorFlow Hub
67
- yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
68
- yamnet = hub.load(yamnet_model_url)
69
-
70
- # Download YAMNet class map CSV file
71
- labels_path = "https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv"
72
- labels = tf.keras.utils.get_file("yamnet_class_map.csv", labels_path)
73
-
74
- # Read class names from the downloaded CSV
75
- def load_class_names(csv_file_path):
76
- class_names = []
77
- with open(csv_file_path, "r") as file:
78
- next(file) # Skip the header
79
- for line in file:
80
- class_names.append(line.strip().split(",")[-1]) # Get the class name from the last column
81
- return class_names
82
-
83
- yamnet_classes = load_class_names(labels)
84
-
85
- # Define a function for YAMNet inference
86
- def yamnet_inference(file_name):
87
- try:
88
- # Load the audio file and resample to 16kHz (YAMNet's expected sample rate)
89
- waveform, sample_rate = librosa.load(file_name, sr=16000)
90
-
91
- # Normalize audio data
92
- waveform = waveform / np.max(np.abs(waveform))
93
-
94
- # Convert to tensor
95
  waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
 
 
96
 
97
- # Predict the scores and embeddings from YAMNet
98
- scores, embeddings, spectrogram = yamnet(waveform)
99
-
100
- # Average the scores across time frames to get a single prediction for the entire audio
101
- prediction = tf.reduce_mean(scores, axis=0).numpy()
102
-
103
- return prediction
104
- except Exception as e:
105
- print(f"Error processing file {file_name}: {e}")
106
- return None
107
-
108
- # Function to map predictions to class names
109
- def get_top_class(predictions):
110
- if predictions is None:
111
- return "Error"
112
- top_class = np.argmax(predictions) # Get the index of the class with the highest score
113
- return yamnet_classes[top_class] if top_class < len(yamnet_classes) else "Unknown"
114
 
115
- import tensorflow as tf
116
- import tensorflow_hub as hub
117
- import numpy as np
118
- from sklearn.model_selection import train_test_split
119
- from tensorflow.keras.utils import to_categorical
120
- from datasets import DatasetDict
121
 
122
- # Load YAMNet Model
123
- yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
124
- yamnet_model = hub.load(yamnet_model_url)
125
 
126
- # Function to extract embeddings from audio
127
- def extract_embedding(audio_example):
128
- '''Extract YAMNet embeddings from a waveform'''
129
- # Convert the audio example to a NumPy array
130
- waveform = audio_example["audio"]["array"] # Ensure correct key reference
131
- waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
132
 
133
- # Run YAMNet model
134
- scores, embeddings, spectrogram = yamnet_model(waveform)
 
 
 
135
 
136
- return {"embedding": embeddings.numpy()}
 
 
 
 
137
 
138
- # Apply embedding extraction to training data
139
- train_embeddings = dataset["train"].map(extract_embedding)
 
140
 
141
- # Apply embedding extraction to testing data
142
- test_embeddings = dataset["test"].map(extract_embedding)
143
 
144
- X_train, y_train = [], []
145
- X_test, y_test = [], []
146
 
147
- # Process Training Data
148
- for example in train_embeddings:
149
- for embedding in example["embedding"]:
150
- X_train.append(embedding)
151
- y_train.append(example["label"])
 
 
 
152
 
153
- # Process Testing Data
154
- for example in test_embeddings:
155
- for embedding in example["embedding"]:
156
- X_test.append(embedding)
157
- y_test.append(example["label"])
158
 
159
- # Convert to NumPy arrays
160
- X_train = np.array(X_train)
161
- y_train = np.array(y_train)
162
- X_test = np.array(X_test)
163
- y_test = np.array(y_test)
164
 
165
- # Convert labels to categorical (one-hot encoding)
166
- y_train_cat = to_categorical(y_train, num_classes=2)
167
- y_test_cat = to_categorical(y_test, num_classes=2)
168
 
169
- print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")
 
 
170
 
171
- from tensorflow.keras.models import Sequential
172
- from tensorflow.keras.layers import Dense, Dropout
 
173
 
174
- # Define the model
175
- model = Sequential([
176
- Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
177
- Dropout(0.3),
178
- Dense(64, activation='relu'),
179
- Dropout(0.3),
180
- Dense(2, activation='softmax') # 2 classes: chainsaw (0) vs. environment (1)
181
- ])
182
 
183
- model.summary()
184
-
185
- # Compile the model
186
- model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
187
-
188
- # Train the model on YAMNet embeddings
189
- model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))
190
-
191
- # Evaluate the model
192
- y_pred = model.predict(X_test)
193
- y_pred_labels = np.argmax(y_pred, axis=1)
194
-
195
- from sklearn.metrics import accuracy_score
196
- accuracy = accuracy_score(y_test, y_pred_labels)
197
- print("Transfer Learning Model Accuracy:", accuracy)
198
 
199
- # Predict labels for the test dataset
200
- # Run YAMNet inference on the raw audio data
201
- predictions = []
202
 
203
- for audio_data in test_dataset["audio"]:
204
- # Extract waveform and sampling rate
205
- waveform = audio_data["array"]
206
- sample_rate = audio_data["sampling_rate"]
207
 
208
- # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
209
- if sample_rate != 16000:
210
- waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
211
 
212
- # Convert waveform to tensor
213
- waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
 
214
 
215
- # Ensure waveform is 1D
216
- waveform = tf.squeeze(waveform)
217
 
218
- # Predict with YAMNet--->model
219
- # Get YAMNet embeddings
220
- _, embeddings, _ = yamnet_model(waveform) # Using the original yamnet_model for embedding extraction
221
 
222
- # Calculate the mean of the embeddings across the time dimension
223
- embeddings = tf.reduce_mean(embeddings, axis=0) # Average across time frames
224
 
225
- # Reshape embeddings for prediction
226
- embeddings = embeddings.numpy() # Convert to NumPy array
227
- embeddings = embeddings.reshape(1, -1) # Reshape to (1, embedding_dimension)
228
 
229
- # Now predict using your trained model
230
- scores = model.predict(embeddings)
 
231
 
232
- # Get predicted class
233
- predicted_class_index = np.argmax(scores)
234
- predicted_class_label = predicted_class_index # Assuming 0 for 'chainsaw', 1 for 'environment'
235
 
236
- # Get the top class name using the predicted label
237
- top_class = "chainsaw" if predicted_class_label == 0 else "environment"
238
- predictions.append(top_class)
 
 
 
 
 
 
 
 
239
 
240
- print("Predictions:", predictions)
 
241
 
242
- def map_predictions_to_labels(predictions):
243
- """
244
- Maps string predictions to numeric labels:
245
- - "chainsaw" -> 0
246
- - any other class -> 1
247
- Args:
248
- predictions (list of str): List of class name predictions.
249
- Returns:
250
- list of int: Mapped numeric labels.
251
- """
252
- return [0 if pred == "chainsaw" else 1 for pred in predictions]
253
 
254
- from sklearn.metrics import accuracy_score
 
 
255
 
256
- # Map string predictions to numeric labels
257
- numeric_predictions = map_predictions_to_labels(predictions)
258
-
259
- # Extract true labels (already numeric)
260
- true_labels = test_dataset["label"]
261
-
262
- # Calculate accuracy
263
- accuracy = accuracy_score(true_labels, numeric_predictions)
264
- print("Accuracy:", accuracy)
265
-
266
- #--------------------------------------------------------------------------------------------
267
- # YOUR MODEL INFERENCE STOPS HERE
268
- #--------------------------------------------------------------------------------------------
269
-
270
- # Stop tracking emissions
271
- emissions_data = tracker.stop_task()
272
-
273
- # Prepare results dictionary
274
- results = {
275
- "username": username,
276
- "space_url": space_url,
277
- "submission_timestamp": datetime.now().isoformat(),
278
- "model_description": DESCRIPTION,
279
- "accuracy": float(accuracy),
280
- "energy_consumed_wh": emissions_data.energy_consumed * 1000,
281
- "emissions_gco2eq": emissions_data.emissions * 1000,
282
- "emissions_data": clean_emissions_data(emissions_data),
283
- "api_route": ROUTE,
284
- "dataset_config": {
285
- "dataset_name": request.dataset_name,
286
- "test_size": request.test_size,
287
- "test_seed": request.test_seed
288
  }
289
- }
290
 
291
- print(results)
 
16
  DESCRIPTION = "Random Baseline"
17
  ROUTE = "/audio"
18
 
 
 
 
19
  @router.post(ROUTE, tags=["Audio Task"],
20
  description=DESCRIPTION)
21
  async def evaluate_audio(request: AudioEvaluationRequest):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Load and prepare the dataset
23
  # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
24
+ dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
25
+
26
  # Split dataset
27
  train_test = dataset["train"]
28
  test_dataset = dataset["test"]
29
+
30
  # Start tracking emissions
31
  tracker.start()
32
  tracker.start_task("inference")
33
+
34
  #--------------------------------------------------------------------------------------------
35
  # YOUR MODEL INFERENCE CODE HERE
36
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
37
  #--------------------------------------------------------------------------------------------
38
+ import tensorflow as tf
39
+ import tensorflow_hub as hub
40
+ import librosa
41
+ import numpy as np
42
+ from sklearn.model_selection import train_test_split
43
+ from tensorflow.keras.utils import to_categorical
44
+
45
+ # Load YAMNet Model
46
+ yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
47
+ yamnet_model = hub.load(yamnet_model_url)
48
+
49
+ # Function to extract embeddings from audio
50
+ def extract_embedding(audio_example):
51
+ '''Extract YAMNet embeddings from a waveform'''
52
+ waveform = audio_example["audio"]["array"] # Ensure correct key reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
54
+ scores, embeddings, spectrogram = yamnet_model(waveform)
55
+ return {"embedding": embeddings.numpy()}
56
 
57
+ # Apply embedding extraction to training data
58
+ train_embeddings = dataset["train"].map(extract_embedding)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ # Apply embedding extraction to testing data
61
+ test_embeddings = dataset["test"].map(extract_embedding)
 
 
 
 
62
 
63
+ X_train, y_train = [], []
64
+ X_test, y_test = [], []
 
65
 
66
+ # Process Training Data
67
+ for example in train_embeddings:
68
+ for embedding in example["embedding"]:
69
+ X_train.append(embedding)
70
+ y_train.append(example["label"])
 
71
 
72
+ # Process Testing Data
73
+ for example in test_embeddings:
74
+ for embedding in example["embedding"]:
75
+ X_test.append(embedding)
76
+ y_test.append(example["label"])
77
 
78
+ # Convert to NumPy arrays
79
+ X_train = np.array(X_train)
80
+ y_train = np.array(y_train)
81
+ X_test = np.array(X_test)
82
+ y_test = np.array(y_test)
83
 
84
+ # Convert labels to categorical (one-hot encoding)
85
+ y_train_cat = to_categorical(y_train, num_classes=2)
86
+ y_test_cat = to_categorical(y_test, num_classes=2)
87
 
88
+ print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")
 
89
 
90
+ from tensorflow.keras.models import Sequential
91
+ from tensorflow.keras.layers import Dense, Dropout
92
 
93
+ # Define the model
94
+ model = Sequential([
95
+ Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
96
+ Dropout(0.3),
97
+ Dense(64, activation='relu'),
98
+ Dropout(0.3),
99
+ Dense(2, activation='softmax') # 2 classes: chainsaw (0) vs. environment (1)
100
+ ])
101
 
102
+ model.summary()
 
 
 
 
103
 
104
+ # Compile the model
105
+ model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
 
 
 
106
 
107
+ # Train the model on YAMNet embeddings
108
+ model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))
 
109
 
110
+ # Evaluate the model
111
+ y_pred = model.predict(X_test)
112
+ y_pred_labels = np.argmax(y_pred, axis=1)
113
 
114
+ from sklearn.metrics import accuracy_score
115
+ accuracy = accuracy_score(y_test, y_pred_labels)
116
+ print("Transfer Learning Model Accuracy:", accuracy)
117
 
118
+ # Predict labels for the test dataset
119
+ # Run YAMNet inference on the raw audio data
120
+ predictions = []
 
 
 
 
 
121
 
122
+ for audio_data in test_dataset["audio"]:
123
+ # Extract waveform and sampling rate
124
+ waveform = audio_data["array"]
125
+ sample_rate = audio_data["sampling_rate"]
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
128
+ if sample_rate != 16000:
129
+ waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
130
 
131
+ # Convert waveform to tensor
132
+ waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
 
 
133
 
134
+ # Ensure waveform is 1D
135
+ waveform = tf.squeeze(waveform)
 
136
 
137
+ # Predict with YAMNet--->model
138
+ # Get YAMNet embeddings
139
+ _, embeddings, _ = yamnet_model(waveform) # Using the original yamnet_model for embedding extraction
140
 
141
+ # Calculate the mean of the embeddings across the time dimension
142
+ embeddings = tf.reduce_mean(embeddings, axis=0) # Average across time frames
143
 
144
+ # Reshape embeddings for prediction
145
+ embeddings = embeddings.numpy() # Convert to NumPy array
146
+ embeddings = embeddings.reshape(1, -1) # Reshape to (1, embedding_dimension)
147
 
148
+ # Now predict using your trained model
149
+ scores = model.predict(embeddings)
150
 
151
+ # Get predicted class
152
+ predicted_class_index = np.argmax(scores)
153
+ predicted_class_label = predicted_class_index # Assuming 0 for 'chainsaw', 1 for 'environment'
154
 
155
+ # Get the top class name using the predicted label
156
+ top_class = "chainsaw" if predicted_class_label == 0 else "environment"
157
+ predictions.append(top_class)
158
 
159
+ print("Predictions:", predictions)
 
 
160
 
161
+ def map_predictions_to_labels(predictions):
162
+ """
163
+ Maps string predictions to numeric labels:
164
+ - "chainsaw" -> 0
165
+ - any other class -> 1
166
+ Args:
167
+ predictions (list of str): List of class name predictions.
168
+ Returns:
169
+ list of int: Mapped numeric labels.
170
+ """
171
+ return [0 if pred == "chainsaw" else 1 for pred in predictions]
172
 
173
+ # Map string predictions to numeric labels
174
+ numeric_predictions = map_predictions_to_labels(predictions)
175
 
176
+ # Extract true labels (already numeric)
177
+ true_labels = test_dataset["label"]
 
 
 
 
 
 
 
 
 
178
 
179
+ # Calculate accuracy
180
+ accuracy = accuracy_score(true_labels, numeric_predictions)
181
+ print("Accuracy:", accuracy)
182
 
183
+ #--------------------------------------------------------------------------------------------
184
+ # YOUR MODEL INFERENCE STOPS HERE
185
+ #--------------------------------------------------------------------------------------------
186
+
187
+ # Stop tracking emissions
188
+ emissions_data = tracker.stop_task()
189
+
190
+ # Prepare results dictionary
191
+ results = {
192
+ "username": username,
193
+ "space_url": space_url,
194
+ "submission_timestamp": datetime.now().isoformat(),
195
+ "model_description": DESCRIPTION,
196
+ "accuracy": float(accuracy),
197
+ "energy_consumed_wh": emissions_data.energy_consumed * 1000,
198
+ "emissions_gco2eq": emissions_data.emissions * 1000,
199
+ "emissions_data": clean_emissions_data(emissions_data),
200
+ "api_route": ROUTE,
201
+ "dataset_config": {
202
+ "dataset_name": request.dataset_name,
203
+ "test_size": request.test_size,
204
+ "test_seed": request.test_seed
205
+ }
 
 
 
 
 
 
 
 
 
206
  }
 
207
 
208
+ print(results)