File size: 2,603 Bytes
deecd23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

import spacy
from fast_sentence_tokenize import tokenize_text


def evaluate_response(original_tokenized: str, response: str) -> int:
    """
    - Tokenize response string using spacy
    - Create a list of response tokens
    - Assign every original token a rank:
        - Only look at the last mention of a token in the response
        - Rank the tokens by how early they appear in the response (last mention only)
    - Calculate ranking accuracy

    Returns a value between 0 and 1
    """
    original_tokenized = original_tokenized.strip().lower()
    response = response.strip().lower()

    # Tokenize response string using a simple regex-based tokenization
    response_tokens = tokenize_text(response)
    
    # Create a list of original tokens
    original_tokens = original_tokenized.split()

    # Create ranks for response tokens
    response_token_ranks = {}
    for token in original_tokens:
        if token not in response_tokens:
            return 0  # If any original token is missing from the response, return 0 immediately

    # Create ranks for original tokens
    original_token_ranks = {}
    for i, token in enumerate(original_tokens):
            original_token_ranks[token] = i
    
    # Create ranks for response tokens
    for token in original_tokens:
        # Assign index of last occurrence of token in response
        response_token_ranks[token] = len(response_tokens) - 1 - response_tokens[::-1].index(token)

    # Normalize the response token ranks
    sorted_ranks = sorted(set(response_token_ranks.values()))
    rank_mapping = {old_rank: new_rank for new_rank, old_rank in enumerate(sorted_ranks)}
    for token, rank in response_token_ranks.items():
        response_token_ranks[token] = rank_mapping[rank]

    # Calculate Kendall's tau
    n = len(original_tokens)
    concordant_pairs = 0
    discordant_pairs = 0
    
    for i in range(n):
        for j in range(i + 1, n):
            original_diff = original_token_ranks[original_tokens[i]] - original_token_ranks[original_tokens[j]]
            response_diff = response_token_ranks[original_tokens[i]] - response_token_ranks[original_tokens[j]]
            
            if original_diff * response_diff > 0:
                concordant_pairs += 1
            elif original_diff * response_diff < 0:
                discordant_pairs += 1
    
    total_pairs = n * (n - 1) // 2
    kendall_tau = (concordant_pairs - discordant_pairs) / total_pairs
    
    # Normalize Kendall's tau to be between 0 and 1
    normalized_kendall_tau = (kendall_tau + 1) / 2
    
    return normalized_kendall_tau