File size: 1,143 Bytes
a063388 ea8ce67 3d3b2cd ea8ce67 3d3b2cd 3b277e3 3d3b2cd ea8ce67 3d3b2cd a063388 3d3b2cd a063388 3b277e3 a063388 3b277e3 ea8ce67 a063388 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# emotion_detector.py
import os
import torchaudio
import tempfile
from speechbrain.inference.classifiers import AudioClassifier
# Set HF_HOME to writable temp directory
os.environ["HF_HOME"] = os.path.join(tempfile.gettempdir(), "hf_cache")
# Also set Hugging Face cache dir explicitly (optional but helps)
os.environ["HF_DATASETS_CACHE"] = os.environ["HF_HOME"]
os.environ["TRANSFORMERS_CACHE"] = os.environ["HF_HOME"]
os.environ["HUGGINGFACE_HUB_CACHE"] = os.environ["HF_HOME"]
# Use a temp directory for SpeechBrain model saving
model_dir = os.path.join(tempfile.gettempdir(), "emotion_model")
# Load pretrained model
classifier = AudioClassifier.from_hparams(
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
savedir=model_dir
)
EMOTION_EMOJIS = {
"angry": "π ",
"happy": "π",
"neutral": "π",
"sad": "π’",
"fearful": "π¨"
}
def detect_emotion(audio_path):
signal, fs = torchaudio.load(audio_path)
prediction = classifier.classify_file(audio_path)
emotion = prediction[3] # Predicted label
emoji = EMOTION_EMOJIS.get(emotion.lower(), "β")
return emotion, emoji
|