|
import gradio as gr |
|
import numpy as np |
|
import librosa |
|
from transformers import pipeline |
|
|
|
|
|
emotion_analyzer = pipeline("audio-classification", model="MIT/ast-finetuned-speech-commands-v2") |
|
speech_recognizer = pipeline("automatic-speech-recognition", |
|
model="kresnik/wav2vec2-large-xlsr-korean") |
|
|
|
def analyze_voice(audio_file): |
|
"""Voice analysis function""" |
|
try: |
|
|
|
y, sr = librosa.load(audio_file) |
|
|
|
|
|
emotions = emotion_analyzer(y) |
|
primary_emotion = emotions[0] |
|
|
|
|
|
text_result = speech_recognizer(y) |
|
|
|
|
|
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) |
|
energy = np.mean(librosa.feature.rms(y=y)) |
|
|
|
return { |
|
"emotion": primary_emotion['label'], |
|
"emotion_probability": f"{primary_emotion['score']:.2f}", |
|
"transcribed_text": text_result['text'], |
|
"energy_level": f"{energy:.2f}", |
|
"status": "Analysis complete" |
|
} |
|
except Exception as e: |
|
return { |
|
"error": str(e), |
|
"status": "Error occurred" |
|
} |
|
|
|
|
|
interface = gr.Interface( |
|
fn=analyze_voice, |
|
inputs=gr.Audio(source="microphone", type="filepath", label="Voice Input"), |
|
outputs=gr.JSON(label="Analysis Results"), |
|
title="Digital Gut - Voice Emotion Analysis", |
|
description="Performs emotion analysis and text conversion from voice input.", |
|
theme=gr.themes.Soft(), |
|
analytics_enabled=True |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
interface.launch() |