CAPT-ReadAloud / app.py
seba3y's picture
Upload 2 files
5ac4106
raw
history blame
3.6 kB
import gradio as gr
from audio import predict_all # This is your custom module for predictions
import re # Regular expressions for text processing
# Additional CSS for styling the confidence bars and the result layout
additional_css = """
/* CSS for the confidence bars */
.confidence-section {
display: flex;
align-items: center;
margin-top: 10px;
}
.confidence-label {
margin-right: 10px;
font-weight: bold;
}
.confidence-bar {
height: 20px;
width: 100%;
background-color: #eee;
border-radius: 10px;
margin: 10px 0;
}
.confidence-fill {
height: 100%;
border-radius: 10px;
background-color: #4caf50; /* Change color based on confidence level if desired */
text-align: center;
color: white;
line-height: 20px;
}
/* Additional CSS for styling the rest of your results */
"""
# Function to generate custom HTML for the confidence bar
def custom_confidence_bar(confidence):
color = "#4caf50" if confidence > 75 else "#FFC107" if confidence > 50 else "#F44336"
return f"""
<div class="confidence-section">
<span class="confidence-label">Model Confidence:</span>
<div class="confidence-bar">
<div class="confidence-fill" style="width: {confidence}%; background-color: {color};">
{confidence}%
</div>
</div>
</div>
"""
# Function to extract score level from message
def extract_score_level(message):
match = re.search(r'Score: (\d+)-(\d+)', message)
score_level = f"{match.group(1)} of 10" if match else "N/A"
return score_level
def message_markdown(label, message, task, score_level):
md = f'''# {label}
**Model Prediction:** {message}
**{task} Score:** {score_level}
'''
return md
# Function to process the audio file and analyze it
def analyze_audio(audio_data):
# Assuming predict_all returns a tuple of (message, confidence) for accuracy and fluency
accuracy, fluency = predict_all(audio_data)
# Unpack the results
accuracy_message, accuracy_confidence = accuracy
fluency_message, fluency_confidence = fluency
# Extract the score level from the message
accuracy_score = extract_score_level(accuracy_message)
fluency_score = extract_score_level(fluency_message)
# Remove the score level from the original message
accuracy_message = accuracy_message.split(",")[1].strip() if "," in accuracy_message else accuracy_message
fluency_message = fluency_message.split(",")[1].strip() if "," in fluency_message else fluency_message
# Generate the confidence bar HTML
accuracy_confidence_html = custom_confidence_bar(accuracy_confidence * 100)
fluency_confidence_html = custom_confidence_bar(fluency_confidence * 100)
accuracy_markdown = message_markdown('Accuracy of Pronunciation', accuracy_message, 'Pronunciation', accuracy_score)
fluency_markdown = message_markdown('Speaker Fluency', fluency_message, 'Fluency', fluency_score)
return accuracy_markdown, accuracy_confidence_html, fluency_markdown, fluency_confidence_html
# Define the Gradio interface
iface = gr.Interface(
fn=analyze_audio,
inputs=gr.Audio(label="Upload Audio"),
outputs=[
gr.Markdown(label="Accuracy Score Level"),
gr.HTML(label="Accuracy Confidence"),
gr.Markdown(label="Fluency Score Level"),
gr.HTML(label="Fluency Confidence"),
],
css=additional_css,
title="Audio Analysis Tool",
description="Upload an audio file to analyze its accuracy and fluency."
)
# Run the Gradio app
if __name__ == "__main__":
iface.launch()