| import pandas as pd | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| def evaluate_emotions_from_csv( | |
| csv_path, | |
| model_path, | |
| output_csv_path, | |
| emotion_columns, | |
| device="cuda" if torch.cuda.is_available() else "cpu", | |
| threshold=0.1 | |
| ): | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
| model.to(device) | |
| model.eval() | |
| data = pd.read_csv(csv_path) | |
| data['emotions'] = data['emotions'].apply(lambda x: x.split(' ')) | |
| true_emotions_list = [] | |
| predicted_emotions_list = [] | |
| correctly_identified = [] | |
| incorrectly_identified = [] | |
| undefined = [] | |
| for idx, row in data.iterrows(): | |
| text = row['text'] | |
| true_emotions = set(row['emotions']) | |
| inputs = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=128).to(device) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| probabilities = torch.sigmoid(logits).squeeze().cpu().numpy() | |
| predicted_emotions = set( | |
| emotion for emotion, prob in zip(emotion_columns, probabilities) if prob > threshold | |
| ) | |
| correct = true_emotions & predicted_emotions | |
| incorrect = predicted_emotions - true_emotions | |
| undefined_emotions = true_emotions - predicted_emotions | |
| true_emotions_list.append(' '.join(true_emotions)) | |
| predicted_emotions_list.append(' '.join(predicted_emotions)) | |
| correctly_identified.append(' '.join(correct) if correct else 'None') | |
| incorrectly_identified.append(' '.join(incorrect) if incorrect else 'None') | |
| undefined.append(' '.join(undefined_emotions) if undefined_emotions else 'None') | |
| results_df = pd.DataFrame({ | |
| "text": data['text'], | |
| "true emotions": true_emotions_list, | |
| "predict emotions": predicted_emotions_list, | |
| "correctly Identified": correctly_identified, | |
| "incorrectly Identified": incorrectly_identified, | |
| "undefined": undefined, | |
| }) | |
| results_df.to_csv(output_csv_path, index=False) | |
| print(f"Результаты сохранены в {output_csv_path}") | |
| csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/testEmotionDetected.csv" | |
| model_path = "RuBert-tiny2-EmotionsDetected" | |
| output_csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/Emotions_detected.csv" | |
| emotion_columns = [ | |
| "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", | |
| "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", | |
| "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" | |
| ] | |
| evaluate_emotions_from_csv(csv_path, model_path, output_csv_path, emotion_columns, threshold=0.2) | |