roots / app.py
haepada's picture
Create app.py
5693c3d verified
raw
history blame
1.71 kB
import gradio as gr
import numpy as np
import librosa
from transformers import pipeline
# Initialize models
emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2")
speech_recognizer = pipeline("automatic-speech-recognition",
model="kresnik/wav2vec2-large-xlsr-korean")
def analyze_voice(audio_file):
"""Voice analysis function"""
try:
# Load audio
y, sr = librosa.load(audio_file)
# 1. Voice emotion analysis
emotions = emotion_analyzer(y)
primary_emotion = emotions[0]
# 2. Speech to text
text_result = speech_recognizer(y)
# 3. Extract audio features
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
energy = np.mean(librosa.feature.rms(y=y))
return {
"emotion": primary_emotion['label'],
"emotion_probability": f"{primary_emotion['score']:.2f}",
"transcribed_text": text_result['text'],
"energy_level": f"{energy:.2f}",
"status": "Analysis complete"
}
except Exception as e:
return {
"error": str(e),
"status": "Error occurred"
}
# Create Gradio interface
interface = gr.Interface(
fn=analyze_voice,
inputs=gr.Audio(source="microphone", type="filepath", label="Voice Input"),
outputs=gr.JSON(label="Analysis Results"),
title="Digital Gut - Voice Emotion Analysis",
description="Performs emotion analysis and text conversion from voice input.",
theme=gr.themes.Soft(),
analytics_enabled=True
)
# Launch app
if __name__ == "__main__":
interface.launch()