Spaces:
Sleeping
Sleeping
File size: 2,603 Bytes
deecd23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import spacy
from fast_sentence_tokenize import tokenize_text
def evaluate_response(original_tokenized: str, response: str) -> int:
"""
- Tokenize response string using spacy
- Create a list of response tokens
- Assign every original token a rank:
- Only look at the last mention of a token in the response
- Rank the tokens by how early they appear in the response (last mention only)
- Calculate ranking accuracy
Returns a value between 0 and 1
"""
original_tokenized = original_tokenized.strip().lower()
response = response.strip().lower()
# Tokenize response string using a simple regex-based tokenization
response_tokens = tokenize_text(response)
# Create a list of original tokens
original_tokens = original_tokenized.split()
# Create ranks for response tokens
response_token_ranks = {}
for token in original_tokens:
if token not in response_tokens:
return 0 # If any original token is missing from the response, return 0 immediately
# Create ranks for original tokens
original_token_ranks = {}
for i, token in enumerate(original_tokens):
original_token_ranks[token] = i
# Create ranks for response tokens
for token in original_tokens:
# Assign index of last occurrence of token in response
response_token_ranks[token] = len(response_tokens) - 1 - response_tokens[::-1].index(token)
# Normalize the response token ranks
sorted_ranks = sorted(set(response_token_ranks.values()))
rank_mapping = {old_rank: new_rank for new_rank, old_rank in enumerate(sorted_ranks)}
for token, rank in response_token_ranks.items():
response_token_ranks[token] = rank_mapping[rank]
# Calculate Kendall's tau
n = len(original_tokens)
concordant_pairs = 0
discordant_pairs = 0
for i in range(n):
for j in range(i + 1, n):
original_diff = original_token_ranks[original_tokens[i]] - original_token_ranks[original_tokens[j]]
response_diff = response_token_ranks[original_tokens[i]] - response_token_ranks[original_tokens[j]]
if original_diff * response_diff > 0:
concordant_pairs += 1
elif original_diff * response_diff < 0:
discordant_pairs += 1
total_pairs = n * (n - 1) // 2
kendall_tau = (concordant_pairs - discordant_pairs) / total_pairs
# Normalize Kendall's tau to be between 0 and 1
normalized_kendall_tau = (kendall_tau + 1) / 2
return normalized_kendall_tau
|