Spaces:

ajaykarthick
/

audio_deepfake_detection

Runtime error

audio_deepfake_detection / src /features /extraction /prosodic_features.py

Ajay Karthick Senthil Kumar

add src

381c43b about 1 year ago

4.89 kB

	import librosa
	import parselmouth
	from parselmouth.praat import call

	import numpy as np


	class ProsodicFeatureExtractor:
	"""
	A class for extracting various prosodic features from audio data.

	Attributes:
	y (numpy.array): Audio time series.
	sr (int): Sampling rate of the audio time series.
	audio_arr (numpy.array): Original audio array for parselmouth processing.
	orig_sr (int): Original sampling rate of the audio array

	Methods:
	extract(features_to_extract=None): Extracts specified prosodic features from audio.
	extract_f0(): Extracts fundamental frequency (F0) from audio.
	extract_energy(): Extracts energy from audio.
	extract_speaking_rate(): Estimates the speaking rate from audio.
	extract_pauses(): Detects pauses from audio.
	extract_formants(): Extracts formant frequencies from audio.
	"""
	def __init__(self, y, sr, audio_arr, orig_sr):
	"""
	Initializes the ProsodicFeatureExtractor with audio data.
	"""
	self.y = y
	self.sr = sr
	self.audio_arr = audio_arr
	self.orig_sr = orig_sr

	def extract(self, features_to_extract=None):
	"""
	Extracts the specified prosodic features.

	Args:
	features_to_extract (list, optional): List of feature names to extract.
	Defaults to all available features if None.

	Returns:
	dict: A dictionary containing the extracted features.
	"""
	feature_funcs = {
	'f0': self.extract_f0,
	'energy': self.extract_energy,
	'speaking_rate': self.extract_speaking_rate,
	'pauses': self.extract_pauses,
	'formants': self.extract_formants
	}

	if features_to_extract is None:
	features_to_extract = feature_funcs.keys()

	features = {}
	for feature in features_to_extract:
	if feature in feature_funcs:
	result = feature_funcs[feature]()
	if isinstance(result, tuple):
	features.update(result)
	else:
	features[feature] = result

	return features

	def extract_f0(self):
	"""
	Extracts the fundamental frequency (F0) using PYIN algorithm.
	"""
	f0, voiced_flag, voiced_probs = librosa.pyin(self.y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
	f0 = np.nan_to_num(f0)
	return f0

	def extract_energy(self):
	"""
	Extracts the root-mean-square (RMS) energy from the audio.
	"""
	return librosa.feature.rms(y=self.y)[0]


	def extract_speaking_rate(self):
	"""
	Estimates the speaking rate by calculating the number of syllables per second.
	"""
	try:
	snd = parselmouth.Sound(self.audio_arr, sampling_frequency=self.orig_sr)
	total_duration = snd.get_total_duration()
	intensity = snd.to_intensity()
	intensity_values = intensity.values.T
	threshold = 0.3 * max(intensity_values)
	syllable_count = len([1 for i in intensity_values if i > threshold])
	speaking_rate = syllable_count / total_duration
	return speaking_rate
	except Exception as e:
	print(f'Error extracting speaking rate: {e}')
	return None

	def extract_pauses(self):
	"""
	Identifies and timestamps pauses in the audio.
	"""
	try:
	snd = parselmouth.Sound(self.audio_arr, sampling_frequency=self.orig_sr)
	silences = call(snd, "To TextGrid (silences)", 100, 0, -25, 0.1, 0.1, "silent", "sounding")
	pauses = [(call(silences, "Get start time of interval", 1, i), call(silences, "Get end time of interval", 1, i)) for i in range(1, call(silences, "Get number of intervals", 1) + 1) if call(silences, "Get label of interval", 1, i) == "silent"]
	return pauses
	except Exception as e:
	print(f'Error extracting pauses: {e}')
	return None

	def extract_formants(self):
	"""
	Extracts the first three formant frequencies using the Burg method.
	"""
	try:
	snd = parselmouth.Sound(self.audio_arr, sampling_frequency=self.orig_sr)
	formant = call(snd, "To Formant (burg)", 0.025, 5, 5500, 0.025, 50)
	formant_values = {}
	for i in range(1, 4): # Extracting the first three formants
	formant_values[f'F{i}_mean'] = call(formant, "Get mean", i, 0, 0, "Hertz")
	formant_values[f'F{i}_stdev'] = call(formant, "Get standard deviation", i, 0, 0, "Hertz")
	return formant_values
	except Exception as e:
	print(f'Error extracting formants: {e}')
	return {}