Spaces:
Sleeping
Sleeping
File size: 4,011 Bytes
c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 a830941 25e2c30 c13f0a5 a830941 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 25e2c30 c13f0a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import numpy as np
import librosa
def calculate_expected_value(scores):
# First calculate the probability of each unique score
unique_scores, counts = np.unique(scores, return_counts=True)
probabilities = counts / len(scores)
# Then calculate the expected value as the sum of scores times their probabilities
expected_value = np.dot(unique_scores, probabilities)
return expected_value
def calculate_fluency_score(audio_path, total_words, word_pronunciation_scores, base_script_len):
avg_pronunciation_score = calculate_expected_value(word_pronunciation_scores)
if (total_words / base_script_len) < 0.15 or avg_pronunciation_score < 1.5:
return 10
audio, sr = librosa.load(audio_path)
non_silent_intervals = librosa.effects.split(audio, top_db=22)
non_silent_duration = sum([intv[1] - intv[0] for intv in non_silent_intervals]) / sr
total_duration = len(audio) / sr
non_silent_duration = non_silent_duration
ideal_min_rate, ideal_max_rate = 120 / 60, 140 / 60
actual_speech_rate = (total_words / (non_silent_duration + 1e-7)) * (total_words / base_script_len)
speaking_ratio = non_silent_duration / total_duration
# Existing speech rate score calculation
# Determine if speech rate is within the ideal range
if actual_speech_rate <= ideal_max_rate:
# Within the ideal range or speaking slow
max_ratio = actual_speech_rate / ideal_max_rate
min_ratio = (actual_speech_rate / ideal_min_rate)
speech_rate_score = np.mean([max_ratio, min_ratio]) - 0.167
# for normal speaking speech_rate_score between (0.708, 1) and for slow speaking speech_rate_score (0.707, 0)
else:
# Too fast
# for fast speaking speech_rate_score (0.707, 0)
max_ratio = actual_speech_rate / ideal_max_rate
speech_rate_score = 0.7 / max_ratio
# If speaking ratio is significantly less than the gold standard, reduce the fluency score
gold_standard_ratio = 0.9 # Assuming 90% speaking time is gold standard for natural speech
speaking_ratio_score = min(speaking_ratio / gold_standard_ratio, 1)
# Pronunciation score calculation
avg_pronunciation_score = (avg_pronunciation_score - 1) / 2
# pronunciation_variance = np.var(word_pronunciation_scores, ddof=1,)
# Weighted combination of scores
# Adjust weights as needed
weight_speech_rate = 0.30
weight_speaking_ratio = 0.20
weight_pronunciation = 0.50
# weight_pronunciation_variance = 0.10
combined_score = speech_rate_score * weight_speech_rate + speaking_ratio_score * weight_speaking_ratio + avg_pronunciation_score * weight_pronunciation
# Scale the combined score to be between 10% and 100%
scaled_fluency_score = 10 + combined_score * 80
return scaled_fluency_score
def calculate_pronunciation_accuracy(word_pronunciation_scores, fluency_score, base_script_len, total_words):
# if total_words / base_script_len < 0.25:
# return 10
# Calculate average word pronunciation score
avg_pronunciation_score = calculate_expected_value(word_pronunciation_scores)
fluency_score = fluency_score / 100
avg_pronunciation_score = (avg_pronunciation_score - 1) / 2
avg_weight = 0.75
flu_weight = 0.25
combined_score = avg_weight * avg_pronunciation_score + flu_weight * fluency_score
# Scale to 10% - 90%
final_score = 10 + combined_score * 80
return final_score
def calculate_fluency_and_pronunciation(audio_path, total_words, word_pronunciation_scores, base_script_len):
fluency_score = calculate_fluency_score(audio_path, total_words, word_pronunciation_scores, base_script_len)
pronunciation_accuracy = calculate_pronunciation_accuracy(word_pronunciation_scores, fluency_score, base_script_len, total_words)
return {'fluency_score': fluency_score, 'pronunciation_accuracy': pronunciation_accuracy}
if __name__ == '__main__':
pass |