audio_ImenMourali

Sleeping

App Files Files Community

ImenMourali commited on Jan 31

Commit

b321cd2

verified ·

1 Parent(s): 0ae53cb

Update tasks/audio.py

Browse files

Files changed (1) hide show

tasks/audio.py +205 -11

tasks/audio.py CHANGED Viewed

@@ -21,13 +21,18 @@ ROUTE = "/audio"
 @router.post(ROUTE, tags=["Audio Task"],
              description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
-    """
-    Evaluate audio classification for rainforest sound detection.
-    Current Model: Random Baseline
-    - Makes random predictions from the label space (0-1)
-    - Used as a baseline for comparison
-    """
     # Get space info
     username, space_url = get_space_info()
@@ -52,10 +57,199 @@ async def evaluate_audio(request: AudioEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    # Make random predictions (placeholder for actual model inference)
-    true_labels = test_dataset["label"]
-    predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 @router.post(ROUTE, tags=["Audio Task"],
              description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
+from sklearn.metrics import accuracy_score
+# Map string predictions to numeric labels
+numeric_predictions = map_predictions_to_labels(predictions)
+# Extract true labels (already numeric)
+true_labels = test_dataset["label"]
+# Calculate accuracy
+accuracy = accuracy_score(true_labels, numeric_predictions)
+print("Accuracy:", accuracy)
     # Get space info
     username, space_url = get_space_info()
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+import tensorflow as tf
+import tensorflow_hub as hub
+import librosa
+import numpy as np
+import os
+# Load the YAMNet model from TensorFlow Hub
+yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
+yamnet = hub.load(yamnet_model_url)
+# Download YAMNet class map CSV file
+labels_path = "https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv"
+labels = tf.keras.utils.get_file("yamnet_class_map.csv", labels_path)
+# Read class names from the downloaded CSV
+def load_class_names(csv_file_path):
+    class_names = []
+    with open(csv_file_path, "r") as file:
+        next(file)  # Skip the header
+        for line in file:
+            class_names.append(line.strip().split(",")[-1])  # Get the class name from the last column
+    return class_names
+yamnet_classes = load_class_names(labels)
+# Define a function for YAMNet inference
+def yamnet_inference(file_name):
+    try:
+        # Load the audio file and resample to 16kHz (YAMNet's expected sample rate)
+        waveform, sample_rate = librosa.load(file_name, sr=16000)
+        # Normalize audio data
+        waveform = waveform / np.max(np.abs(waveform))
+        # Convert to tensor
+        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
+        # Predict the scores and embeddings from YAMNet
+        scores, embeddings, spectrogram = yamnet(waveform)
+        # Average the scores across time frames to get a single prediction for the entire audio
+        prediction = tf.reduce_mean(scores, axis=0).numpy()
+        return prediction
+    except Exception as e:
+        print(f"Error processing file {file_name}: {e}")
+        return None
+# Function to map predictions to class names
+def get_top_class(predictions):
+    if predictions is None:
+        return "Error"
+    top_class = np.argmax(predictions)  # Get the index of the class with the highest score
+    return yamnet_classes[top_class] if top_class < len(yamnet_classes) else "Unknown"
+import tensorflow as tf
+import tensorflow_hub as hub
+import numpy as np
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.utils import to_categorical
+from datasets import DatasetDict
+# Load YAMNet Model
+yamnet_model_url = "https://tfhub.dev/google/yamnet/1"
+yamnet_model = hub.load(yamnet_model_url)
+# Function to extract embeddings from audio
+def extract_embedding(audio_example):
+    '''Extract YAMNet embeddings from a waveform'''
+    # Convert the audio example to a NumPy array
+    waveform = audio_example["audio"]["array"]  # Ensure correct key reference
+    waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
+# Run YAMNet model
+    scores, embeddings, spectrogram = yamnet_model(waveform)
+    return {"embedding": embeddings.numpy()}
+# Apply embedding extraction to training data
+train_embeddings = train_test["train"].map(extract_embedding)
+# Apply embedding extraction to testing data
+test_embeddings = train_test["test"].map(extract_embedding)
+X_train, y_train = [], []
+X_test, y_test = [], []
+# Process Training Data
+for example in train_embeddings:
+    for embedding in example["embedding"]:
+        X_train.append(embedding)
+        y_train.append(example["label"])
+# Process Testing Data
+for example in test_embeddings:
+    for embedding in example["embedding"]:
+        X_test.append(embedding)
+        y_test.append(example["label"])
+# Convert to NumPy arrays
+X_train = np.array(X_train)
+y_train = np.array(y_train)
+X_test = np.array(X_test)
+y_test = np.array(y_test)
+# Convert labels to categorical (one-hot encoding)
+y_train_cat = to_categorical(y_train, num_classes=2)
+y_test_cat = to_categorical(y_test, num_classes=2)
+print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout
+# Define the model
+model = Sequential([
+    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
+    Dropout(0.3),
+    Dense(64, activation='relu'),
+    Dropout(0.3),
+    Dense(2, activation='softmax')  # 2 classes: chainsaw (0) vs. environment (1)
+])
+model.summary()
+# Compile the model
+model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
+# Train the model on YAMNet embeddings
+model.fit(X_train, y_train_cat, epochs=20, batch_size=16, validation_data=(X_test, y_test_cat))
+# Evaluate the model
+y_pred = model.predict(X_test)
+y_pred_labels = np.argmax(y_pred, axis=1)
+from sklearn.metrics import accuracy_score
+accuracy = accuracy_score(y_test, y_pred_labels)
+print("Transfer Learning Model Accuracy:", accuracy)
+# Predict labels for the test dataset
+# Run YAMNet inference on the raw audio data
+predictions = []
+for audio_data in test_dataset["audio"]:
+    # Extract waveform and sampling rate
+    waveform = audio_data["array"]
+    sample_rate = audio_data["sampling_rate"]
+    # Resample the waveform to 16kHz (YAMNet's expected sample rate) if necessary
+    if sample_rate != 16000:
+        waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=16000)
+    # Convert waveform to tensor
+    waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
+    # Ensure waveform is 1D
+    waveform = tf.squeeze(waveform)
+    # Predict with YAMNet--->model
+    # Get YAMNet embeddings
+    _, embeddings, _ = yamnet_model(waveform)  # Using the original yamnet_model for embedding extraction
+    # Calculate the mean of the embeddings across the time dimension
+    embeddings = tf.reduce_mean(embeddings, axis=0)  # Average across time frames
+    # Reshape embeddings for prediction
+    embeddings = embeddings.numpy()  # Convert to NumPy array
+    embeddings = embeddings.reshape(1, -1)  # Reshape to (1, embedding_dimension)
+    # Now predict using your trained model
+    scores = model.predict(embeddings)
+    # Get predicted class
+    predicted_class_index = np.argmax(scores)
+    predicted_class_label = predicted_class_index  # Assuming 0 for 'chainsaw', 1 for 'environment'
+    # Get the top class name using the predicted label
+    top_class = "chainsaw" if predicted_class_label == 0 else "environment"
+    predictions.append(top_class)
+print("Predictions:", predictions)
+def map_predictions_to_labels(predictions):
+    """
+    Maps string predictions to numeric labels:
+    - "chainsaw" -> 0
+    - any other class -> 1
+    Args:
+        predictions (list of str): List of class name predictions.
+    Returns:
+        list of int: Mapped numeric labels.
+    """
+    return [0 if pred == "chainsaw" else 1 for pred in predictions]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE