import torch from transformers import WhisperProcessor, WhisperForConditionalGeneration, AutoModelForAudioClassification, AutoFeatureExtractor from fer import FER def load_models(): """ Loads all the machine learning models and returns them as a dictionary. """ # Whisper model for transcription whisper_model_name = "openai/whisper-base" whisper_processor = WhisperProcessor.from_pretrained(whisper_model_name) whisper_model = WhisperForConditionalGeneration.from_pretrained(whisper_model_name) device = "cuda" if torch.cuda.is_available() else "cpu" whisper_model = whisper_model.to(device) # Speech emotion recognition model emotion_model_id = "firdhokk/speech-emotion-recognition-with-openai-whisper-large-v3" emotion_model = AutoModelForAudioClassification.from_pretrained(emotion_model_id) emotion_feature_extractor = AutoFeatureExtractor.from_pretrained(emotion_model_id, do_normalize=True) emotion_id2label = emotion_model.config.id2label # Facial emotion recognition model fer_detector = FER(mtcnn=True) return { "whisper": { "processor": whisper_processor, "model": whisper_model, "device": device, }, "emotion_model": emotion_model, "emotion_feature_extractor": emotion_feature_extractor, "emotion_id2label": emotion_id2label, "fer": fer_detector, }