File size: 2,977 Bytes
c7c2507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

def evaluate_emotions_from_csv(

    csv_path, 

    model_path, 

    output_csv_path, 

    emotion_columns, 

    device="cuda" if torch.cuda.is_available() else "cpu", 

    threshold=0.1

):
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    model.to(device)
    model.eval()

    data = pd.read_csv(csv_path)
    data['emotions'] = data['emotions'].apply(lambda x: x.split(' '))

    true_emotions_list = []
    predicted_emotions_list = []
    correctly_identified = []
    incorrectly_identified = []
    undefined = []

    for idx, row in data.iterrows():
        text = row['text']
        true_emotions = set(row['emotions']) 

        inputs = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=128).to(device)
        with torch.no_grad():
            logits = model(**inputs).logits
        probabilities = torch.sigmoid(logits).squeeze().cpu().numpy()

        predicted_emotions = set(
            emotion for emotion, prob in zip(emotion_columns, probabilities) if prob > threshold
        )

        correct = true_emotions & predicted_emotions
        incorrect = predicted_emotions - true_emotions
        undefined_emotions = true_emotions - predicted_emotions

        true_emotions_list.append(' '.join(true_emotions))
        predicted_emotions_list.append(' '.join(predicted_emotions))
        correctly_identified.append(' '.join(correct) if correct else 'None')
        incorrectly_identified.append(' '.join(incorrect) if incorrect else 'None')
        undefined.append(' '.join(undefined_emotions) if undefined_emotions else 'None')

    results_df = pd.DataFrame({
        "text": data['text'],
        "true emotions": true_emotions_list,
        "predict emotions": predicted_emotions_list,
        "correctly Identified": correctly_identified,
        "incorrectly Identified": incorrectly_identified,
        "undefined": undefined,
    })

    results_df.to_csv(output_csv_path, index=False)
    print(f"Результаты сохранены в {output_csv_path}")

csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/testEmotionDetected.csv" 
model_path = "RuBert-tiny2-EmotionsDetected"        
output_csv_path = "RuBert-tiny2-EmotionsDetected/Dstasets/Emotions_detected.csv"  
emotion_columns = [
    "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire",
    "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy",
    "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
]

evaluate_emotions_from_csv(csv_path, model_path, output_csv_path, emotion_columns, threshold=0.2)