Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

Multimodal-Behavioral-Anomalies-Detection / voice_analysis.py

reab5555

Update voice_analysis.py

6316512 verified 9 months ago

raw

history blame

1.84 kB

	import numpy as np
	import librosa
	from sklearn.cluster import DBSCAN
	from pydub import AudioSegment

	def extract_voice_features(audio_path, segment_duration=1000):
	# Load the audio file
	y, sr = librosa.load(audio_path)

	# Extract MFCC features
	mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

	# Segment the MFCCs
	segment_length = int(segment_duration * sr / 1000)
	num_segments = len(y) // segment_length

	segments = []
	for i in range(num_segments):
	start = i * segment_length
	end = start + segment_length
	segment = mfccs[:, start:end]
	segments.append(np.mean(segment, axis=1))

	return np.array(segments)

	def remove_nan_features(features):
	return features[~np.isnan(features).any(axis=1)]

	def cluster_voices(features):
	# Remove NaN values
	features = remove_nan_features(features)

	if len(features) < 2:
	print("Not enough voice segments for clustering. Assigning all to one cluster.")
	return np.zeros(len(features), dtype=int)

	dbscan = DBSCAN(eps=0.5, min_samples=5, metric='euclidean')
	clusters = dbscan.fit_predict(features)

	if np.all(clusters == -1):
	print("DBSCAN assigned all to noise. Considering as one cluster.")
	return np.zeros(len(features), dtype=int)

	return clusters

	def get_most_frequent_voice(features, clusters):
	largest_cluster = max(set(clusters), key=list(clusters).count)
	return features[clusters == largest_cluster]

	def process_audio(audio_path, segment_duration=1000):
	features = extract_voice_features(audio_path, segment_duration)
	features = remove_nan_features(features)
	clusters = cluster_voices(features)
	most_frequent_voice = get_most_frequent_voice(features, clusters)
	return most_frequent_voice, features, clusters