import os import numpy as np import librosa from speaker.speaker_identification import assign_speaker_for_audio_list # Define constants TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Test_data_for_clas_Idef') AUDIO_FILES_DIR = os.path.join(TEST_DATA_DIR, 'enni_audio_files') NUMPY_FILES_DIR = os.path.join(TEST_DATA_DIR, 'enni_testset_numpy_minimal') FILEPATHS_DIR = os.path.join(TEST_DATA_DIR, 'enni_testset_filepaths_minimal') def generate_fake_audio_test_set(num_samples=10, length=16000, random_seed=42): """ Generate a synthetic test set of fake audio signals (numpy arrays). Args: num_samples (int): Number of audio samples. length (int): Length of each audio sample (e.g., 1 second at 16kHz). random_seed (int): Seed for reproducibility. Returns: List[np.ndarray]: List of fake audio signals. """ np.random.seed(random_seed) return [np.random.randn(length) for _ in range(num_samples)] def test_file_paths(): """Test with all real audio files from the dataset""" # Get file paths using the constant audio_dir = AUDIO_FILES_DIR # Get all child and adult files child_files = [ os.path.join(audio_dir, file) for file in os.listdir(audio_dir) if file.startswith('child_') and file.endswith('.wav') ] # Use all child files adult_files = [ os.path.join(audio_dir, file) for file in os.listdir(audio_dir) if file.startswith('adult_') and file.endswith('.wav') ] # Use all adult files # Create list with known order audio_list = child_files + adult_files # Get speaker IDs speaker_ids = assign_speaker_for_audio_list(audio_list) # Print results print("\n--- Testing with file paths ---") print(f"Testing {len(audio_list)} audio files: {len(child_files)} child files and {len(adult_files)} adult files") # Count correct predictions correct = 0 for i, (file, speaker_id) in enumerate(zip(audio_list, speaker_ids)): expected = "Speaker_id_0" if "child_" in file else "Speaker_id_1" is_correct = speaker_id == expected correct += 1 if is_correct else 0 # Print only the first 5 examples to avoid cluttering the output if i < 5: print(f"{i+1}. {os.path.basename(file)}: {speaker_id} (Expected: {expected}) {'✓' if is_correct else '✗'}") # Print accuracy accuracy = correct / len(audio_list) * 100 if audio_list else 0 print(f"Accuracy: {correct}/{len(audio_list)} ({accuracy:.2f}%)") def test_numpy_arrays(): """Test with NumPy arrays by loading all audio files""" # Get file paths using the constant audio_dir = AUDIO_FILES_DIR # Load all child and adult files as arrays child_files = [ os.path.join(audio_dir, file) for file in os.listdir(audio_dir) if file.startswith('child_') and file.endswith('.wav') ] adult_files = [ os.path.join(audio_dir, file) for file in os.listdir(audio_dir) if file.startswith('adult_') and file.endswith('.wav') ] # Load as arrays child_arrays = [librosa.load(f, sr=16000)[0] for f in child_files] adult_arrays = [librosa.load(f, sr=16000)[0] for f in adult_files] # Create list with known order audio_list = child_arrays + adult_arrays filenames = [os.path.basename(f) for f in child_files + adult_files] # Get speaker IDs speaker_ids = assign_speaker_for_audio_list(audio_list) # Print results print("\n--- Testing with NumPy arrays ---") print(f"Testing {len(audio_list)} audio arrays: {len(child_arrays)} child arrays and {len(adult_arrays)} adult arrays") # Count correct predictions correct = 0 for i, (filename, speaker_id) in enumerate(zip(filenames, speaker_ids)): expected = "Speaker_id_0" if "child_" in filename else "Speaker_id_1" is_correct = speaker_id == expected correct += 1 if is_correct else 0 # Print only the first 5 examples to avoid cluttering the output if i < 5: print(f"{i+1}. {filename} (as array): {speaker_id} (Expected: {expected}) {'✓' if is_correct else '✗'}") # Print accuracy accuracy = correct / len(audio_list) * 100 if audio_list else 0 print(f"Accuracy: {correct}/{len(audio_list)} ({accuracy:.2f}%)") if __name__ == "__main__": # Test with synthetic data print("--- Testing with synthetic data ---") audio_list = generate_fake_audio_test_set(num_samples=5) speaker_ids = assign_speaker_for_audio_list(audio_list) print(f"Synthetic data predictions: {speaker_ids}") # Test with real files try: test_file_paths() except Exception as e: print(f"Error testing file paths: {e}") # Test with NumPy arrays try: test_numpy_arrays() except Exception as e: print(f"Error testing NumPy arrays: {e}")