submissionTtemplateFrugalAI

Sleeping

App Files Files Community

ImenMourali commited on Feb 5

Commit

6327a1b

verified ·

1 Parent(s): 98adc46

Update tasks/audio.py

Browse files

Files changed (1) hide show

tasks/audio.py +43 -150

tasks/audio.py CHANGED Viewed

@@ -1,196 +1,89 @@
 from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
-import random
-import os
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from dotenv import load_dotenv
 load_dotenv()
 router = APIRouter()
 DESCRIPTION = "Random Baseline"
 ROUTE = "/audio"
-@router.post(ROUTE, tags=["Audio Task"],
-             description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
-    # Load and prepare the dataset
-    # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
     dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
-    # Split dataset
-    train_test = dataset["train"]
     test_dataset = dataset["test"]
     # Start tracking emissions
     tracker.start()
     tracker.start_task("inference")
-    #--------------------------------------------------------------------------------------------
-    # YOUR MODEL INFERENCE CODE HERE
-    # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
-    #--------------------------------------------------------------------------------------------
-    import tensorflow as tf
-    import tensorflow_hub as hub
-    import librosa
-    import numpy as np
-    from sklearn.model_selection import train_test_split
-    from tensorflow.keras.utils import to_categorical
-    # Load YAMNet Model
-    yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
-    yamnet_model = hub.load(yamnet_model_url)
-    # Function to extract embeddings from audio
-    def extract_embedding(audio_example):
-        '''Extract YAMNet embeddings from a waveform'''
-        waveform = audio_example["audio"]["array"]  # Ensure correct key reference
-        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
-        scores, embeddings, spectrogram = yamnet_model(waveform)
-        return {"embedding": embeddings.numpy()}
-    # Apply embedding extraction to training data
-    train_embeddings = dataset["train"].map(extract_embedding)
-    # Apply embedding extraction to testing data
-    test_embeddings = dataset["test"].map(extract_embedding)
-    X_train, y_train = [], []
-    X_test, y_test = [], []
-    # Process Training Data
-    for example in train_embeddings:
-        for embedding in example["embedding"]:
-            X_train.append(embedding)
-            y_train.append(example["label"])
-    # Process Testing Data
-    for example in test_embeddings:
-        for embedding in example["embedding"]:
-            X_test.append(embedding)
-            y_test.append(example["label"])
-    # Convert to NumPy arrays
-    X_train = np.array(X_train)
-    y_train = np.array(y_train)
-    X_test = np.array(X_test)
-    y_test = np.array(y_test)
-    # Convert labels to categorical (one-hot encoding)
-    y_train_cat = to_categorical(y_train, num_classes=2)
-    y_test_cat = to_categorical(y_test, num_classes=2)
-    print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")
-    from tensorflow.keras.models import Sequential
-    from tensorflow.keras.layers import Dense, Dropout
-    # Define the model
-    model = Sequential([
-        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
-        Dropout(0.3),
-        Dense(64, activation='relu'),
-        Dropout(0.3),
-        Dense(2, activation='softmax')  # 2 classes: chainsaw (0) vs. environment (1)
-    ])
-    model.summary()
-    # Compile the model
-    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
-    # Train the model on YAMNet embeddings
-    model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))
-    # Evaluate the model
-    y_pred = model.predict(X_test)
-    y_pred_labels = np.argmax(y_pred, axis=1)
-    from sklearn.metrics import accuracy_score
-    accuracy = accuracy_score(y_test, y_pred_labels)
-    print("Transfer Learning Model Accuracy:", accuracy)
-    # Predict labels for the test dataset
-    # Run YAMNet inference on the raw audio data
     predictions = []
     for audio_data in test_dataset["audio"]:
         # Extract waveform and sampling rate
         waveform = audio_data["array"]
         sample_rate = audio_data["sampling_rate"]
-        # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
         if sample_rate != 16000:
             waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
-        # Convert waveform to tensor
         waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
-        # Ensure waveform is 1D
-        waveform = tf.squeeze(waveform)
-        # Predict with YAMNet--->model
-        # Get YAMNet embeddings
-        _, embeddings, _ = yamnet_model(waveform)  # Using the original yamnet_model for embedding extraction
-        # Calculate the mean of the embeddings across the time dimension
-        embeddings = tf.reduce_mean(embeddings, axis=0)  # Average across time frames
-        # Reshape embeddings for prediction
-        embeddings = embeddings.numpy()  # Convert to NumPy array
-        embeddings = embeddings.reshape(1, -1)  # Reshape to (1, embedding_dimension)
-        # Now predict using your trained model
-        scores = model.predict(embeddings)
-        # Get predicted class
         predicted_class_index = np.argmax(scores)
-        predicted_class_label = predicted_class_index  # Assuming 0 for 'chainsaw', 1 for 'environment'
-        # Get the top class name using the predicted label
-        top_class = "chainsaw" if predicted_class_label == 0 else "environment"
-        predictions.append(top_class)
-    print("Predictions:", predictions)
-    def map_predictions_to_labels(predictions):
-        """
-        Maps string predictions to numeric labels:
-        - "chainsaw" -> 0
-        - any other class -> 1
-        Args:
-            predictions (list of str): List of class name predictions.
-        Returns:
-            list of int: Mapped numeric labels.
-        """
-        return [0 if pred == "chainsaw" else 1 for pred in predictions]
     # Map string predictions to numeric labels
-    numeric_predictions = map_predictions_to_labels(predictions)
-    # Extract true labels (already numeric)
     true_labels = test_dataset["label"]
-    # Calculate accuracy
     accuracy = accuracy_score(true_labels, numeric_predictions)
-    print("Accuracy:", accuracy)
-    #--------------------------------------------------------------------------------------------
-    # YOUR MODEL INFERENCE STOPS HERE
-    #--------------------------------------------------------------------------------------------
     # Stop tracking emissions
     emissions_data = tracker.stop_task()
-    # Prepare results dictionary
     results = {
-        "username": username,
-        "space_url": space_url,
         "submission_timestamp": datetime.now().isoformat(),
         "model_description": DESCRIPTION,
         "accuracy": float(accuracy),
@@ -205,4 +98,4 @@ async def evaluate_audio(request: AudioEvaluationRequest):
         }
     }
-    print(results)

+import tensorflow as tf
+import tensorflow_hub as hub
+import numpy as np
+import librosa
+import os
+import tarfile
+from tensorflow.keras.models import load_model
 from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from dotenv import load_dotenv
 load_dotenv()
 router = APIRouter()
 DESCRIPTION = "Random Baseline"
 ROUTE = "/audio"
+# Define paths for local model files
+YAMNET_TAR_PATH = "./yamnet-tensorflow2-yamnet-v1.tar.gz"  # Ensure this is in the correct directory
+EXTRACT_PATH = "./yamnet_model"
+CLASSIFIER_PATH = "./audio_model.h5"
+# Extract YAMNet if it is not already extracted
+if not os.path.exists(EXTRACT_PATH):
+    with tarfile.open(YAMNET_TAR_PATH, "r:gz") as tar:
+        tar.extractall(EXTRACT_PATH)
+# Load YAMNet
+yamnet = hub.load(EXTRACT_PATH)
+# Load trained classifier
+audio_model = load_model(CLASSIFIER_PATH)
+@router.post(ROUTE, tags=["Audio Task"], description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
+    """Inference function to classify audio samples using a pre-trained model."""
+    # Load dataset
     dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
     test_dataset = dataset["test"]
     # Start tracking emissions
     tracker.start()
     tracker.start_task("inference")
     predictions = []
     for audio_data in test_dataset["audio"]:
         # Extract waveform and sampling rate
         waveform = audio_data["array"]
         sample_rate = audio_data["sampling_rate"]
+        # Resample if needed
         if sample_rate != 16000:
             waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
+        # Convert to tensor
         waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
+        waveform = tf.squeeze(waveform)  # Ensure waveform is 1D
+        # Extract embeddings from YAMNet
+        _, embeddings, _ = yamnet(waveform)
+        embeddings = tf.reduce_mean(embeddings, axis=0).numpy()  # Average over time
+        # Reshape embeddings for classifier input
+        embeddings = embeddings.reshape(1, -1)
+        # Predict using the trained classifier
+        scores = audio_model.predict(embeddings)
         predicted_class_index = np.argmax(scores)
+        predicted_class_label = "chainsaw" if predicted_class_index == 0 else "environment"
+        predictions.append(predicted_class_label)
     # Map string predictions to numeric labels
+    numeric_predictions = [0 if pred == "chainsaw" else 1 for pred in predictions]
     true_labels = test_dataset["label"]
     accuracy = accuracy_score(true_labels, numeric_predictions)
     # Stop tracking emissions
     emissions_data = tracker.stop_task()
+    # Prepare results
     results = {
         "submission_timestamp": datetime.now().isoformat(),
         "model_description": DESCRIPTION,
         "accuracy": float(accuracy),
         }
     }
+    return results