File size: 7,541 Bytes
4d6e8c2
fe4a4cb
 
 
 
3b09640
fe4a4cb
4d6e8c2
fe4a4cb
4d6e8c2
4768d6b
3b09640
 
4d6e8c2
 
70f5f26
1c33274
70f5f26
1c33274
70f5f26
4d6e8c2
fe4a4cb
3b09640
adf98b6
 
fe4a4cb
398ac10
 
adf98b6
fe4a4cb
 
 
adf98b6
fe4a4cb
 
 
 
adf98b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
 
 
 
b321cd2
adf98b6
 
 
 
 
b321cd2
adf98b6
 
 
 
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
b321cd2
adf98b6
 
b321cd2
adf98b6
 
 
 
 
 
 
 
b321cd2
adf98b6
b321cd2
adf98b6
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
 
 
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
 
 
b321cd2
adf98b6
b321cd2
adf98b6
 
 
 
 
 
 
 
 
 
 
b321cd2
adf98b6
 
b321cd2
adf98b6
 
f19a289
adf98b6
 
 
f19a289
adf98b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe4a4cb
2a198b3
adf98b6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
from fastapi import APIRouter
from datetime import datetime
from datasets import load_dataset
from sklearn.metrics import accuracy_score
import random
import os

from .utils.evaluation import AudioEvaluationRequest
from .utils.emissions import tracker, clean_emissions_data, get_space_info

from dotenv import load_dotenv 
load_dotenv()

router = APIRouter()

DESCRIPTION = "Random Baseline"
ROUTE = "/audio"

@router.post(ROUTE, tags=["Audio Task"],
             description=DESCRIPTION)
async def evaluate_audio(request: AudioEvaluationRequest):
    # Load and prepare the dataset
    # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
    dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))

    # Split dataset
    train_test = dataset["train"]
    test_dataset = dataset["test"]

    # Start tracking emissions
    tracker.start()
    tracker.start_task("inference")

    #--------------------------------------------------------------------------------------------
    # YOUR MODEL INFERENCE CODE HERE
    # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
    #--------------------------------------------------------------------------------------------   
    import tensorflow as tf
    import tensorflow_hub as hub
    import librosa
    import numpy as np
    from sklearn.model_selection import train_test_split
    from tensorflow.keras.utils import to_categorical

    # Load YAMNet Model
    yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
    yamnet_model = hub.load(yamnet_model_url)

    # Function to extract embeddings from audio
    def extract_embedding(audio_example):
        '''Extract YAMNet embeddings from a waveform'''
        waveform = audio_example["audio"]["array"]  # Ensure correct key reference
        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
        scores, embeddings, spectrogram = yamnet_model(waveform)
        return {"embedding": embeddings.numpy()}

    # Apply embedding extraction to training data
    train_embeddings = dataset["train"].map(extract_embedding)

    # Apply embedding extraction to testing data
    test_embeddings = dataset["test"].map(extract_embedding)

    X_train, y_train = [], []
    X_test, y_test = [], []

    # Process Training Data
    for example in train_embeddings:
        for embedding in example["embedding"]:
            X_train.append(embedding)
            y_train.append(example["label"])

    # Process Testing Data
    for example in test_embeddings:
        for embedding in example["embedding"]:
            X_test.append(embedding)
            y_test.append(example["label"])

    # Convert to NumPy arrays
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    # Convert labels to categorical (one-hot encoding)
    y_train_cat = to_categorical(y_train, num_classes=2)
    y_test_cat = to_categorical(y_test, num_classes=2)

    print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")

    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout

    # Define the model
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(2, activation='softmax')  # 2 classes: chainsaw (0) vs. environment (1)
    ])

    model.summary()

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model on YAMNet embeddings
    model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))

    # Evaluate the model
    y_pred = model.predict(X_test)
    y_pred_labels = np.argmax(y_pred, axis=1)

    from sklearn.metrics import accuracy_score
    accuracy = accuracy_score(y_test, y_pred_labels)
    print("Transfer Learning Model Accuracy:", accuracy)

    # Predict labels for the test dataset
    # Run YAMNet inference on the raw audio data
    predictions = []

    for audio_data in test_dataset["audio"]:
        # Extract waveform and sampling rate
        waveform = audio_data["array"]
        sample_rate = audio_data["sampling_rate"]

        # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
        if sample_rate != 16000:
            waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)

        # Convert waveform to tensor
        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)

        # Ensure waveform is 1D
        waveform = tf.squeeze(waveform)

        # Predict with YAMNet--->model
        # Get YAMNet embeddings
        _, embeddings, _ = yamnet_model(waveform)  # Using the original yamnet_model for embedding extraction

        # Calculate the mean of the embeddings across the time dimension
        embeddings = tf.reduce_mean(embeddings, axis=0)  # Average across time frames

        # Reshape embeddings for prediction
        embeddings = embeddings.numpy()  # Convert to NumPy array
        embeddings = embeddings.reshape(1, -1)  # Reshape to (1, embedding_dimension)

        # Now predict using your trained model
        scores = model.predict(embeddings)

        # Get predicted class
        predicted_class_index = np.argmax(scores)
        predicted_class_label = predicted_class_index  # Assuming 0 for 'chainsaw', 1 for 'environment'

        # Get the top class name using the predicted label
        top_class = "chainsaw" if predicted_class_label == 0 else "environment"
        predictions.append(top_class)

    print("Predictions:", predictions)

    def map_predictions_to_labels(predictions):
        """
        Maps string predictions to numeric labels:
        - "chainsaw" -> 0
        - any other class -> 1
        Args:
            predictions (list of str): List of class name predictions.
        Returns:
            list of int: Mapped numeric labels.
        """
        return [0 if pred == "chainsaw" else 1 for pred in predictions]

    # Map string predictions to numeric labels
    numeric_predictions = map_predictions_to_labels(predictions)

    # Extract true labels (already numeric)
    true_labels = test_dataset["label"]

    # Calculate accuracy
    accuracy = accuracy_score(true_labels, numeric_predictions)
    print("Accuracy:", accuracy)

    #--------------------------------------------------------------------------------------------
    # YOUR MODEL INFERENCE STOPS HERE
    #--------------------------------------------------------------------------------------------  

    # Stop tracking emissions
    emissions_data = tracker.stop_task()

    # Prepare results dictionary
    results = {
        "username": username,
        "space_url": space_url,
        "submission_timestamp": datetime.now().isoformat(),
        "model_description": DESCRIPTION,
        "accuracy": float(accuracy),
        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
        "emissions_gco2eq": emissions_data.emissions * 1000,
        "emissions_data": clean_emissions_data(emissions_data),
        "api_route": ROUTE,
        "dataset_config": {
            "dataset_name": request.dataset_name,
            "test_size": request.test_size,
            "test_seed": request.test_seed
        }
    }

    print(results)