ImenMourali commited on
Commit
b321cd2
·
verified ·
1 Parent(s): 0ae53cb

Update tasks/audio.py

Browse files
Files changed (1) hide show
  1. tasks/audio.py +205 -11
tasks/audio.py CHANGED
@@ -21,13 +21,18 @@ ROUTE = "/audio"
21
  @router.post(ROUTE, tags=["Audio Task"],
22
  description=DESCRIPTION)
23
  async def evaluate_audio(request: AudioEvaluationRequest):
24
- """
25
- Evaluate audio classification for rainforest sound detection.
26
-
27
- Current Model: Random Baseline
28
- - Makes random predictions from the label space (0-1)
29
- - Used as a baseline for comparison
30
- """
 
 
 
 
 
31
  # Get space info
32
  username, space_url = get_space_info()
33
 
@@ -52,10 +57,199 @@ async def evaluate_audio(request: AudioEvaluationRequest):
52
  # YOUR MODEL INFERENCE CODE HERE
53
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
54
  #--------------------------------------------------------------------------------------------
55
-
56
- # Make random predictions (placeholder for actual model inference)
57
- true_labels = test_dataset["label"]
58
- predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  #--------------------------------------------------------------------------------------------
61
  # YOUR MODEL INFERENCE STOPS HERE
 
21
  @router.post(ROUTE, tags=["Audio Task"],
22
  description=DESCRIPTION)
23
  async def evaluate_audio(request: AudioEvaluationRequest):
24
+ from sklearn.metrics import accuracy_score
25
+
26
+ # Map string predictions to numeric labels
27
+ numeric_predictions = map_predictions_to_labels(predictions)
28
+
29
+ # Extract true labels (already numeric)
30
+ true_labels = test_dataset["label"]
31
+
32
+ # Calculate accuracy
33
+ accuracy = accuracy_score(true_labels, numeric_predictions)
34
+ print("Accuracy:", accuracy)
35
+
36
  # Get space info
37
  username, space_url = get_space_info()
38
 
 
57
  # YOUR MODEL INFERENCE CODE HERE
58
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
59
  #--------------------------------------------------------------------------------------------
60
+ import tensorflow as tf
61
+ import tensorflow_hub as hub
62
+ import librosa
63
+ import numpy as np
64
+ import os
65
+
66
+ # Load the YAMNet model from TensorFlow Hub
67
+ yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
68
+ yamnet = hub.load(yamnet_model_url)
69
+
70
+ # Download YAMNet class map CSV file
71
+ labels_path = "https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv"
72
+ labels = tf.keras.utils.get_file("yamnet_class_map.csv", labels_path)
73
+
74
+ # Read class names from the downloaded CSV
75
+ def load_class_names(csv_file_path):
76
+ class_names = []
77
+ with open(csv_file_path, "r") as file:
78
+ next(file) # Skip the header
79
+ for line in file:
80
+ class_names.append(line.strip().split(",")[-1]) # Get the class name from the last column
81
+ return class_names
82
+
83
+ yamnet_classes = load_class_names(labels)
84
+
85
+ # Define a function for YAMNet inference
86
+ def yamnet_inference(file_name):
87
+ try:
88
+ # Load the audio file and resample to 16kHz (YAMNet's expected sample rate)
89
+ waveform, sample_rate = librosa.load(file_name, sr=16000)
90
+
91
+ # Normalize audio data
92
+ waveform = waveform / np.max(np.abs(waveform))
93
+
94
+ # Convert to tensor
95
+ waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
96
+
97
+ # Predict the scores and embeddings from YAMNet
98
+ scores, embeddings, spectrogram = yamnet(waveform)
99
+
100
+ # Average the scores across time frames to get a single prediction for the entire audio
101
+ prediction = tf.reduce_mean(scores, axis=0).numpy()
102
+
103
+ return prediction
104
+ except Exception as e:
105
+ print(f"Error processing file {file_name}: {e}")
106
+ return None
107
+
108
+ # Function to map predictions to class names
109
+ def get_top_class(predictions):
110
+ if predictions is None:
111
+ return "Error"
112
+ top_class = np.argmax(predictions) # Get the index of the class with the highest score
113
+ return yamnet_classes[top_class] if top_class < len(yamnet_classes) else "Unknown"
114
+
115
+ import tensorflow as tf
116
+ import tensorflow_hub as hub
117
+ import numpy as np
118
+ from sklearn.model_selection import train_test_split
119
+ from tensorflow.keras.utils import to_categorical
120
+ from datasets import DatasetDict
121
+
122
+ # Load YAMNet Model
123
+ yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
124
+ yamnet_model = hub.load(yamnet_model_url)
125
+
126
+ # Function to extract embeddings from audio
127
+ def extract_embedding(audio_example):
128
+ '''Extract YAMNet embeddings from a waveform'''
129
+ # Convert the audio example to a NumPy array
130
+ waveform = audio_example["audio"]["array"] # Ensure correct key reference
131
+ waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
132
+
133
+ # Run YAMNet model
134
+ scores, embeddings, spectrogram = yamnet_model(waveform)
135
+
136
+ return {"embedding": embeddings.numpy()}
137
+
138
+ # Apply embedding extraction to training data
139
+ train_embeddings = train_test["train"].map(extract_embedding)
140
+
141
+ # Apply embedding extraction to testing data
142
+ test_embeddings = train_test["test"].map(extract_embedding)
143
+
144
+ X_train, y_train = [], []
145
+ X_test, y_test = [], []
146
+
147
+ # Process Training Data
148
+ for example in train_embeddings:
149
+ for embedding in example["embedding"]:
150
+ X_train.append(embedding)
151
+ y_train.append(example["label"])
152
+
153
+ # Process Testing Data
154
+ for example in test_embeddings:
155
+ for embedding in example["embedding"]:
156
+ X_test.append(embedding)
157
+ y_test.append(example["label"])
158
+
159
+ # Convert to NumPy arrays
160
+ X_train = np.array(X_train)
161
+ y_train = np.array(y_train)
162
+ X_test = np.array(X_test)
163
+ y_test = np.array(y_test)
164
+
165
+ # Convert labels to categorical (one-hot encoding)
166
+ y_train_cat = to_categorical(y_train, num_classes=2)
167
+ y_test_cat = to_categorical(y_test, num_classes=2)
168
+
169
+ print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")
170
+
171
+ from tensorflow.keras.models import Sequential
172
+ from tensorflow.keras.layers import Dense, Dropout
173
+
174
+ # Define the model
175
+ model = Sequential([
176
+ Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
177
+ Dropout(0.3),
178
+ Dense(64, activation='relu'),
179
+ Dropout(0.3),
180
+ Dense(2, activation='softmax') # 2 classes: chainsaw (0) vs. environment (1)
181
+ ])
182
+
183
+ model.summary()
184
+
185
+ # Compile the model
186
+ model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
187
+
188
+ # Train the model on YAMNet embeddings
189
+ model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))
190
+
191
+ # Evaluate the model
192
+ y_pred = model.predict(X_test)
193
+ y_pred_labels = np.argmax(y_pred, axis=1)
194
+
195
+ from sklearn.metrics import accuracy_score
196
+ accuracy = accuracy_score(y_test, y_pred_labels)
197
+ print("Transfer Learning Model Accuracy:", accuracy)
198
+
199
+ # Predict labels for the test dataset
200
+ # Run YAMNet inference on the raw audio data
201
+ predictions = []
202
+
203
+ for audio_data in test_dataset["audio"]:
204
+ # Extract waveform and sampling rate
205
+ waveform = audio_data["array"]
206
+ sample_rate = audio_data["sampling_rate"]
207
+
208
+ # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
209
+ if sample_rate != 16000:
210
+ waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
211
+
212
+ # Convert waveform to tensor
213
+ waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
214
+
215
+ # Ensure waveform is 1D
216
+ waveform = tf.squeeze(waveform)
217
+
218
+ # Predict with YAMNet--->model
219
+ # Get YAMNet embeddings
220
+ _, embeddings, _ = yamnet_model(waveform) # Using the original yamnet_model for embedding extraction
221
+
222
+ # Calculate the mean of the embeddings across the time dimension
223
+ embeddings = tf.reduce_mean(embeddings, axis=0) # Average across time frames
224
+
225
+ # Reshape embeddings for prediction
226
+ embeddings = embeddings.numpy() # Convert to NumPy array
227
+ embeddings = embeddings.reshape(1, -1) # Reshape to (1, embedding_dimension)
228
+
229
+ # Now predict using your trained model
230
+ scores = model.predict(embeddings)
231
+
232
+ # Get predicted class
233
+ predicted_class_index = np.argmax(scores)
234
+ predicted_class_label = predicted_class_index # Assuming 0 for 'chainsaw', 1 for 'environment'
235
+
236
+ # Get the top class name using the predicted label
237
+ top_class = "chainsaw" if predicted_class_label == 0 else "environment"
238
+ predictions.append(top_class)
239
+
240
+ print("Predictions:", predictions)
241
+
242
+ def map_predictions_to_labels(predictions):
243
+ """
244
+ Maps string predictions to numeric labels:
245
+ - "chainsaw" -> 0
246
+ - any other class -> 1
247
+ Args:
248
+ predictions (list of str): List of class name predictions.
249
+ Returns:
250
+ list of int: Mapped numeric labels.
251
+ """
252
+ return [0 if pred == "chainsaw" else 1 for pred in predictions]
253
 
254
  #--------------------------------------------------------------------------------------------
255
  # YOUR MODEL INFERENCE STOPS HERE