Joschka Strueber commited on
Commit
b16e2d1
·
1 Parent(s): 402b600

[Fix] alpha choices

Browse files
Files changed (2) hide show
  1. src/dataloading.py +3 -3
  2. src/utils.py +6 -1
src/dataloading.py CHANGED
@@ -4,7 +4,7 @@ from huggingface_hub import HfApi
4
 
5
  from functools import lru_cache
6
 
7
- from src.utils import opt_in_pars_to_index, get_test_target
8
 
9
  def get_leaderboard_models_reload():
10
  api = HfApi()
@@ -104,8 +104,8 @@ def filter_labels(dataset_name, doc):
104
  test_target, target_key = get_test_target(doc[0])
105
  if "answer_index" in doc[0].keys():
106
  labels = [d["answer_index"] for d in doc]
107
- elif test_target.startswith("("):
108
- labels = [opt_in_pars_to_index(d[target_key]) for d in doc]
109
  elif dataset_name in ["bbh_boolean_expressions"]:
110
  for d in doc:
111
  if d[target_key] == "True":
 
4
 
5
  from functools import lru_cache
6
 
7
+ from src.utils import opt_to_index, get_test_target
8
 
9
  def get_leaderboard_models_reload():
10
  api = HfApi()
 
104
  test_target, target_key = get_test_target(doc[0])
105
  if "answer_index" in doc[0].keys():
106
  labels = [d["answer_index"] for d in doc]
107
+ elif test_target.startswith("(") or test_target.isalpha():
108
+ labels = [opt_to_index(d[target_key]) for d in doc]
109
  elif dataset_name in ["bbh_boolean_expressions"]:
110
  for d in doc:
111
  if d[target_key] == "True":
src/utils.py CHANGED
@@ -10,12 +10,17 @@ def one_hot(probs: np.array) -> np.array:
10
  one_hot[np.argmax(probs)] = 1
11
  return one_hot
12
 
13
- def opt_in_pars_to_index(s):
14
  if s.startswith("(") and s.endswith(")"):
15
  letter = s[1] # Extract the letter inside the parentheses
16
  return ord(letter) - ord("A") # Convert to zero-based index
 
 
17
  else:
18
  raise ValueError("Invalid format")
 
 
 
19
 
20
  def get_test_target(doc):
21
  if "target" in doc:
 
10
  one_hot[np.argmax(probs)] = 1
11
  return one_hot
12
 
13
+ def opt_to_index(s):
14
  if s.startswith("(") and s.endswith(")"):
15
  letter = s[1] # Extract the letter inside the parentheses
16
  return ord(letter) - ord("A") # Convert to zero-based index
17
+ elif is_single_letter(s):
18
+ return ord(s.upper()) - ord("A")
19
  else:
20
  raise ValueError("Invalid format")
21
+
22
+ def is_single_letter(s):
23
+ return len(s) == 1 and s.isalpha()
24
 
25
  def get_test_target(doc):
26
  if "target" in doc: