File size: 9,664 Bytes
4d6e8c2
fe4a4cb
 
 
 
3b09640
fe4a4cb
4d6e8c2
fe4a4cb
4d6e8c2
4768d6b
3b09640
 
4d6e8c2
 
70f5f26
1c33274
70f5f26
fe4a4cb
8169d28
3b09640
1c33274
70f5f26
4d6e8c2
b321cd2
8169d28
 
 
 
 
 
 
 
 
b321cd2
fe4a4cb
4d6e8c2
fe4a4cb
 
 
 
 
 
 
3b09640
4768d6b
3b09640
fe4a4cb
1431ab9
 
fe4a4cb
 
 
 
 
 
 
 
 
b321cd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d87ee35
b321cd2
 
d87ee35
b321cd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f19a289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ac3d53
2a198b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe4a4cb
2a198b3
 
576ff5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
from fastapi import APIRouter
from datetime import datetime
from datasets import load_dataset
from sklearn.metrics import accuracy_score
import random
import os

from .utils.evaluation import AudioEvaluationRequest
from .utils.emissions import tracker, clean_emissions_data, get_space_info

from dotenv import load_dotenv 
load_dotenv()

router = APIRouter()

DESCRIPTION = "Random Baseline"
ROUTE = "/audio"


from sklearn.metrics import accuracy_score

@router.post(ROUTE, tags=["Audio Task"],
             description=DESCRIPTION)
async def evaluate_audio(request: AudioEvaluationRequest):

    # Map string predictions to numeric labels
    numeric_predictions = map_predictions_to_labels(predictions)
    
    # Extract true labels (already numeric)
    true_labels = test_dataset["label"]
    
    # Calculate accuracy
    accuracy = accuracy_score(true_labels, numeric_predictions)
    print("Accuracy:", accuracy)

    # Get space info
    username, space_url = get_space_info()

    # Define the label mapping
    LABEL_MAPPING = {
        "chainsaw": 0,
        "environment": 1
    }
    # Load and prepare the dataset
    # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
    dataset = load_dataset(request.dataset_name,token=os.getenv("RYmV25"))
    
    # Split dataset
    train_test = dataset["train"]
    test_dataset = dataset["test"]
    
    # Start tracking emissions
    tracker.start()
    tracker.start_task("inference")
    
    #--------------------------------------------------------------------------------------------
    # YOUR MODEL INFERENCE CODE HERE
    # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
    #--------------------------------------------------------------------------------------------   
import tensorflow as tf
import tensorflow_hub as hub
import librosa
import numpy as np
import os

# Load the YAMNet model from TensorFlow Hub
yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
yamnet = hub.load(yamnet_model_url)

# Download YAMNet class map CSV file
labels_path = "https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv"
labels = tf.keras.utils.get_file("yamnet_class_map.csv", labels_path)

# Read class names from the downloaded CSV
def load_class_names(csv_file_path):
    class_names = []
    with open(csv_file_path, "r") as file:
        next(file)  # Skip the header
        for line in file:
            class_names.append(line.strip().split(",")[-1])  # Get the class name from the last column
    return class_names

yamnet_classes = load_class_names(labels)

# Define a function for YAMNet inference
def yamnet_inference(file_name):
    try:
        # Load the audio file and resample to 16kHz (YAMNet's expected sample rate)
        waveform, sample_rate = librosa.load(file_name, sr=16000)

        # Normalize audio data
        waveform = waveform / np.max(np.abs(waveform))

        # Convert to tensor
        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)

        # Predict the scores and embeddings from YAMNet
        scores, embeddings, spectrogram = yamnet(waveform)

        # Average the scores across time frames to get a single prediction for the entire audio
        prediction = tf.reduce_mean(scores, axis=0).numpy()

        return prediction
    except Exception as e:
        print(f"Error processing file {file_name}: {e}")
        return None

# Function to map predictions to class names
def get_top_class(predictions):
    if predictions is None:
        return "Error"
    top_class = np.argmax(predictions)  # Get the index of the class with the highest score
    return yamnet_classes[top_class] if top_class < len(yamnet_classes) else "Unknown"

import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from datasets import DatasetDict

# Load YAMNet Model
yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
yamnet_model = hub.load(yamnet_model_url)

# Function to extract embeddings from audio
def extract_embedding(audio_example):
    '''Extract YAMNet embeddings from a waveform'''
    # Convert the audio example to a NumPy array
    waveform = audio_example["audio"]["array"]  # Ensure correct key reference
    waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)

# Run YAMNet model
    scores, embeddings, spectrogram = yamnet_model(waveform)

    return {"embedding": embeddings.numpy()}

# Apply embedding extraction to training data
train_embeddings = dataset["train"].map(extract_embedding)

# Apply embedding extraction to testing data
test_embeddings = dataset["test"].map(extract_embedding)

X_train, y_train = [], []
X_test, y_test = [], []

# Process Training Data
for example in train_embeddings:
    for embedding in example["embedding"]:
        X_train.append(embedding)
        y_train.append(example["label"])

# Process Testing Data
for example in test_embeddings:
    for embedding in example["embedding"]:
        X_test.append(embedding)
        y_test.append(example["label"])

# Convert to NumPy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

# Convert labels to categorical (one-hot encoding)
y_train_cat = to_categorical(y_train, num_classes=2)
y_test_cat = to_categorical(y_test, num_classes=2)

print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(2, activation='softmax')  # 2 classes: chainsaw (0) vs. environment (1)
])

model.summary()

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on YAMNet embeddings
model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))

# Evaluate the model
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred_labels)
print("Transfer Learning Model Accuracy:", accuracy)

# Predict labels for the test dataset
# Run YAMNet inference on the raw audio data
predictions = []

for audio_data in test_dataset["audio"]:
    # Extract waveform and sampling rate
    waveform = audio_data["array"]
    sample_rate = audio_data["sampling_rate"]

    # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
    if sample_rate != 16000:
        waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)

    # Convert waveform to tensor
    waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)

    # Ensure waveform is 1D
    waveform = tf.squeeze(waveform)

    # Predict with YAMNet--->model
    # Get YAMNet embeddings
    _, embeddings, _ = yamnet_model(waveform)  # Using the original yamnet_model for embedding extraction

    # Calculate the mean of the embeddings across the time dimension
    embeddings = tf.reduce_mean(embeddings, axis=0)  # Average across time frames

    # Reshape embeddings for prediction
    embeddings = embeddings.numpy()  # Convert to NumPy array
    embeddings = embeddings.reshape(1, -1)  # Reshape to (1, embedding_dimension)

    # Now predict using your trained model
    scores = model.predict(embeddings)

    # Get predicted class
    predicted_class_index = np.argmax(scores)
    predicted_class_label = predicted_class_index  # Assuming 0 for 'chainsaw', 1 for 'environment'

    # Get the top class name using the predicted label
    top_class = "chainsaw" if predicted_class_label == 0 else "environment"
    predictions.append(top_class)

print("Predictions:", predictions)

def map_predictions_to_labels(predictions):
    """
    Maps string predictions to numeric labels:
    - "chainsaw" -> 0
    - any other class -> 1
    Args:
        predictions (list of str): List of class name predictions.
    Returns:
        list of int: Mapped numeric labels.
    """
    return [0 if pred == "chainsaw" else 1 for pred in predictions]

from sklearn.metrics import accuracy_score

# Map string predictions to numeric labels
numeric_predictions = map_predictions_to_labels(predictions)

# Extract true labels (already numeric)
true_labels = test_dataset["label"]

# Calculate accuracy
accuracy = accuracy_score(true_labels, numeric_predictions)
print("Accuracy:", accuracy)

#--------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE STOPS HERE
#--------------------------------------------------------------------------------------------  

# Stop tracking emissions
emissions_data = tracker.stop_task()

# Prepare results dictionary
results = {
    "username": username,
    "space_url": space_url,
    "submission_timestamp": datetime.now().isoformat(),
    "model_description": DESCRIPTION,
    "accuracy": float(accuracy),
    "energy_consumed_wh": emissions_data.energy_consumed * 1000,
    "emissions_gco2eq": emissions_data.emissions * 1000,
    "emissions_data": clean_emissions_data(emissions_data),
    "api_route": ROUTE,
    "dataset_config": {
        "dataset_name": request.dataset_name,
        "test_size": request.test_size,
        "test_seed": request.test_seed
    }
}

print(results)