|
import pandas as pd
|
|
import torch
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
|
def evaluate_emotions_from_csv(
|
|
csv_path,
|
|
model_path,
|
|
output_csv_path,
|
|
emotion_columns,
|
|
device="cuda" if torch.cuda.is_available() else "cpu",
|
|
threshold=0.1
|
|
):
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
|
model.to(device)
|
|
model.eval()
|
|
|
|
data = pd.read_csv(csv_path)
|
|
data['emotions'] = data['emotions'].apply(lambda x: x.split(' '))
|
|
|
|
true_emotions_list = []
|
|
predicted_emotions_list = []
|
|
correctly_identified = []
|
|
incorrectly_identified = []
|
|
undefined = []
|
|
|
|
for idx, row in data.iterrows():
|
|
text = row['text']
|
|
true_emotions = set(row['emotions'])
|
|
|
|
inputs = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=128).to(device)
|
|
with torch.no_grad():
|
|
logits = model(**inputs).logits
|
|
probabilities = torch.sigmoid(logits).squeeze().cpu().numpy()
|
|
|
|
predicted_emotions = set(
|
|
emotion for emotion, prob in zip(emotion_columns, probabilities) if prob > threshold
|
|
)
|
|
|
|
correct = true_emotions & predicted_emotions
|
|
incorrect = predicted_emotions - true_emotions
|
|
undefined_emotions = true_emotions - predicted_emotions
|
|
|
|
true_emotions_list.append(' '.join(true_emotions))
|
|
predicted_emotions_list.append(' '.join(predicted_emotions))
|
|
correctly_identified.append(' '.join(correct) if correct else 'None')
|
|
incorrectly_identified.append(' '.join(incorrect) if incorrect else 'None')
|
|
undefined.append(' '.join(undefined_emotions) if undefined_emotions else 'None')
|
|
|
|
results_df = pd.DataFrame({
|
|
"text": data['text'],
|
|
"true emotions": true_emotions_list,
|
|
"predict emotions": predicted_emotions_list,
|
|
"correctly Identified": correctly_identified,
|
|
"incorrectly Identified": incorrectly_identified,
|
|
"undefined": undefined,
|
|
})
|
|
|
|
results_df.to_csv(output_csv_path, index=False)
|
|
print(f"Результаты сохранены в {output_csv_path}")
|
|
|
|
csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/testEmotionDetected.csv"
|
|
model_path = "RuBert-tiny2-EmotionsDetected"
|
|
output_csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/Emotions_detected.csv"
|
|
emotion_columns = [
|
|
"admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire",
|
|
"disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy",
|
|
"love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
|
|
]
|
|
|
|
evaluate_emotions_from_csv(csv_path, model_path, output_csv_path, emotion_columns, threshold=0.2)
|
|
|