openfree commited on
Commit
b64098a
ยท
verified ยท
1 Parent(s): ef55ded

Update src/leaderboard/read_evals.py

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +15 -3
src/leaderboard/read_evals.py CHANGED
@@ -31,6 +31,7 @@ class EvalResult:
31
  num_params: int = 0
32
  date: str = "" # submission date of request file
33
  still_on_hub: bool = False
 
34
 
35
  @classmethod
36
  def init_from_json_file(self, json_filepath):
@@ -57,6 +58,12 @@ class EvalResult:
57
  result_key = f"{org}_{model}_{precision.value.name}"
58
  full_model = "/".join(org_and_model)
59
 
 
 
 
 
 
 
60
  still_on_hub, _, model_config = is_model_on_hub(
61
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
62
  )
@@ -88,7 +95,8 @@ class EvalResult:
88
  precision=precision,
89
  revision= config.get("model_sha", ""),
90
  still_on_hub=still_on_hub,
91
- architecture=architecture
 
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
@@ -110,6 +118,10 @@ class EvalResult:
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
 
 
 
 
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -117,7 +129,7 @@ class EvalResult:
117
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
118
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
119
  AutoEvalColumn.architecture.name: self.architecture,
120
- AutoEvalColumn.model.name: make_clickable_model(self.full_model),
121
  AutoEvalColumn.revision.name: self.revision,
122
  AutoEvalColumn.average.name: average,
123
  AutoEvalColumn.license.name: self.license,
@@ -193,4 +205,4 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
193
  except KeyError: # not all eval values present
194
  continue
195
 
196
- return results
 
31
  num_params: int = 0
32
  date: str = "" # submission date of request file
33
  still_on_hub: bool = False
34
+ display_model: str = "" # ์ƒˆ๋กœ ์ถ”๊ฐ€: ํ‘œ์‹œ์šฉ ๋ชจ๋ธ๋ช…
35
 
36
  @classmethod
37
  def init_from_json_file(self, json_filepath):
 
58
  result_key = f"{org}_{model}_{precision.value.name}"
59
  full_model = "/".join(org_and_model)
60
 
61
+ # ํŠน์ • ๋ชจ๋ธ๋ช…์— ๋Œ€ํ•œ ๋งตํ•‘ ์ฒ˜๋ฆฌ ์ถ”๊ฐ€
62
+ display_model = full_model
63
+ if full_model == "demo-leaderboard/gpt2-demo":
64
+ display_model = "deepseek-ai/DeepSeek-R1"
65
+ print(f"๋ชจ๋ธ๋ช… ๋งตํ•‘ ์ ์šฉ: {full_model} -> {display_model}")
66
+
67
  still_on_hub, _, model_config = is_model_on_hub(
68
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
69
  )
 
95
  precision=precision,
96
  revision= config.get("model_sha", ""),
97
  still_on_hub=still_on_hub,
98
+ architecture=architecture,
99
+ display_model=display_model # ์ƒˆ๋กœ ์ถ”๊ฐ€ํ•œ ํ•„๋“œ ์„ค์ •
100
  )
101
 
102
  def update_with_request_file(self, requests_path):
 
118
  def to_dict(self):
119
  """Converts the Eval Result to a dict compatible with our dataframe display"""
120
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
121
+
122
+ # ํ‘œ์‹œ์šฉ ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
123
+ model_to_display = self.display_model if self.display_model else self.full_model
124
+
125
  data_dict = {
126
  "eval_name": self.eval_name, # not a column, just a save name,
127
  AutoEvalColumn.precision.name: self.precision.value.name,
 
129
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
130
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
131
  AutoEvalColumn.architecture.name: self.architecture,
132
+ AutoEvalColumn.model.name: make_clickable_model(model_to_display), # ์ˆ˜์ •๋œ ๋ถ€๋ถ„
133
  AutoEvalColumn.revision.name: self.revision,
134
  AutoEvalColumn.average.name: average,
135
  AutoEvalColumn.license.name: self.license,
 
205
  except KeyError: # not all eval values present
206
  continue
207
 
208
+ return results