Aryarya commited on
Commit
f9fdde4
·
1 Parent(s): c073751
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. answer_cache/cache.db +3 -0
  3. metrics.py +13 -0
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  results.duckdb filter=lfs diff=lfs merge=lfs -text
37
  puzzles_cleaned.csv filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  results.duckdb filter=lfs diff=lfs merge=lfs -text
37
  puzzles_cleaned.csv filter=lfs diff=lfs merge=lfs -text
38
+ answer_cache/cache.db filter=lfs diff=lfs merge=lfs -text
answer_cache/cache.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23e1a6adfdf2de2f7718c1284bf810dc64d191ee415a0975949b01c6954be1b2
3
+ size 196784128
metrics.py CHANGED
@@ -6,6 +6,11 @@ import argparse
6
  import unicodedata
7
  import re
8
 
 
 
 
 
 
9
  def normalize_text(text: str) -> str:
10
  """Normalize text to remove accents, convert to lowercase, and strip spaces."""
11
  text = unicodedata.normalize("NFKD", text) # Decomposes letters with accents (e.g., é → e + ́)
@@ -42,11 +47,16 @@ def _answer_without_thoughts(completion: str) -> str:
42
  return completion
43
 
44
 
 
45
  def _check_answer(completion: str, answer: str) -> bool:
46
  """
47
  Check that all the phrases that must appear in the answer appear in the
48
  completion. We ignore "thoughts", capitalization, and punctuation.
49
  """
 
 
 
 
50
  completion = _answer_without_thoughts(completion).lower()
51
  completion = completion.replace("**","")
52
  completion = re.sub(r'[^\w\s]', ' ', completion) # this replaces punctuations with space, aligning with the _parse_answer function's ' '.join
@@ -56,9 +66,12 @@ def _check_answer(completion: str, answer: str) -> bool:
56
  for answer_phrases in alternative_answers:
57
  # if all(phrase in completion for phrase in answer_phrases):
58
  if all(re.search(rf'\b{re.escape(phrase)}\b', completion) for phrase in answer_phrases):
 
59
  return True
 
60
  return False
61
 
 
62
  def _clip_text(text: str, width: int) -> str:
63
  return text if len(text) <= width else text[:width] + "..."
64
 
 
6
  import unicodedata
7
  import re
8
 
9
+
10
+ import diskcache as dc
11
+
12
+ cache = dc.Cache("answer_cache")
13
+
14
  def normalize_text(text: str) -> str:
15
  """Normalize text to remove accents, convert to lowercase, and strip spaces."""
16
  text = unicodedata.normalize("NFKD", text) # Decomposes letters with accents (e.g., é → e + ́)
 
47
  return completion
48
 
49
 
50
+
51
  def _check_answer(completion: str, answer: str) -> bool:
52
  """
53
  Check that all the phrases that must appear in the answer appear in the
54
  completion. We ignore "thoughts", capitalization, and punctuation.
55
  """
56
+ key = (completion, answer)
57
+ if key in cache:
58
+ return cache[key]
59
+
60
  completion = _answer_without_thoughts(completion).lower()
61
  completion = completion.replace("**","")
62
  completion = re.sub(r'[^\w\s]', ' ', completion) # this replaces punctuations with space, aligning with the _parse_answer function's ' '.join
 
66
  for answer_phrases in alternative_answers:
67
  # if all(phrase in completion for phrase in answer_phrases):
68
  if all(re.search(rf'\b{re.escape(phrase)}\b', completion) for phrase in answer_phrases):
69
+ cache[key] = True
70
  return True
71
+ cache[key] = False
72
  return False
73
 
74
+
75
  def _clip_text(text: str, width: int) -> str:
76
  return text if len(text) <= width else text[:width] + "..."
77