Spaces:

seba3y
/

CAPT-ReadAloud

Sleeping

App Files Files Community

CAPT-ReadAloud / scoring.py

seba3y

Upload 5 files

c13f0a5 over 1 year ago

raw

history blame

5.2 kB

	import numpy as np
	import librosa

	def calculate_expected_value(scores):
	"""
	Calculate the expected value for a list of outcomes (scores), assuming each unique score
	occurs with a frequency proportional to its count in the list.

	:param scores: List of outcomes (numeric values).
	:return: The expected value (a weighted average of all possible outcomes).
	"""
	# First calculate the probability of each unique score
	unique_scores, counts = np.unique(scores, return_counts=True)
	probabilities = counts / len(scores)

	# Then calculate the expected value as the sum of scores times their probabilities
	expected_value = np.dot(unique_scores, probabilities)
	return expected_value


	def calculate_fluency_score(audio_path, transcription, word_pronunciation_scores, base_script_len):
	total_words = len(transcription.split())
	avg_pronunciation_score = calculate_expected_value(word_pronunciation_scores)
	if (total_words / base_script_len) < 0.15 or avg_pronunciation_score < 1.3:
	return 10
	audio, sr = librosa.load(audio_path)
	non_silent_intervals = librosa.effects.split(audio, top_db=22)
	non_silent_duration = sum([intv[1] - intv[0] for intv in non_silent_intervals]) / sr

	total_duration = len(audio) / sr


	non_silent_duration = non_silent_duration if total_words > 4 else 0
	ideal_min_rate, ideal_max_rate = 120 / 60, 140 / 60
	actual_speech_rate = (total_words / (non_silent_duration + 1e-7)) * (total_words / base_script_len)
	speaking_ratio = non_silent_duration / total_duration
	# Existing speech rate score calculation

	# Determine if speech rate is within the ideal range
	if ideal_min_rate <= actual_speech_rate <= ideal_max_rate:
	# Within the ideal range
	speech_rate_score = 1
	else:
	# Outside the ideal range, score is proportional to how close it is to the range
	if actual_speech_rate < ideal_min_rate:
	# Too slow
	speech_rate_score = actual_speech_rate / ideal_min_rate
	else:
	# Too fast
	speech_rate_score = 2 - (actual_speech_rate / ideal_max_rate)
	# Clamp the score between 0 and 1
	speech_rate_score = max(0, min(speech_rate_score, 1))

	# If speaking ratio is significantly less than the gold standard, reduce the fluency score
	gold_standard_ratio = 0.9 # Assuming 90% speaking time is gold standard for natural speech
	speaking_ratio_score = min(speaking_ratio / gold_standard_ratio, 1)


	# Pronunciation score calculation
	avg_pronunciation_score = (avg_pronunciation_score - 1) / 2
	pronunciation_variance = np.var(word_pronunciation_scores, ddof=1,)

	# Weighted combination of scores
	# Adjust weights as needed
	weight_speech_rate = 0.20
	weight_speaking_ratio = 0.20
	weight_pronunciation = 0.50
	weight_pronunciation_variance = 0.10

	combined_score = (speech_rate_score * weight_speech_rate +
	speaking_ratio_score * weight_speaking_ratio +
	avg_pronunciation_score * weight_pronunciation +
	(1 / (1 + pronunciation_variance)) * weight_pronunciation_variance)

	# Scale the combined score to be between 10% and 100%
	scaled_fluency_score = 10 + combined_score * 80

	return scaled_fluency_score

	def calculate_pronunciation_accuracy(word_pronunciation_scores, fluency_score, base_script_len):
	if len(word_pronunciation_scores) / base_script_len < 0.25:
	return 10
	# Calculate average word pronunciation score
	avg_pronunciation_score = calculate_expected_value(word_pronunciation_scores)
	print(avg_pronunciation_score)
	# Adjust pronunciation score based on fluency
	# fluency_score = fluency_score / 100
	# This is a simplistic adjustment. It can be refined based on more detailed analysis
	fluency_adjustment = fluency_score / 100
	print(fluency_adjustment)
	adjusted_pronunciation_score = avg_pronunciation_score * fluency_adjustment
	print(adjusted_pronunciation_score)
	# Map to 0-5 scale based on score guide
	# These thresholds can be adjusted based on empirical data or further analysis
	if adjusted_pronunciation_score >= 2.4:
	score_guide_level = 5
	elif adjusted_pronunciation_score >= 1.7:
	score_guide_level = 4
	elif adjusted_pronunciation_score >= 1.0:
	score_guide_level = 3
	elif adjusted_pronunciation_score >= 0.5:
	score_guide_level = 2
	else:
	score_guide_level = 1

	# Scale to 10% - 90%
	final_score = 10 + (score_guide_level - 1) * 20 # Scale each level to a range of 20%

	return final_score

	def calculate_fluency_and_pronunciation(audio_path, transcription, word_pronunciation_scores, base_script_len):

	fluency_score = calculate_fluency_score(audio_path, transcription, word_pronunciation_scores, base_script_len)

	pronunciation_accuracy = calculate_pronunciation_accuracy(word_pronunciation_scores, fluency_score, base_script_len)

	return {'fluency_score': fluency_score, 'pronunciation_accuracy': pronunciation_accuracy}


	if __name__ == '__main__':
	pass