Spaces:

macrocosm-os
/

finetuning-leaderboard

Runtime error

App Files Files Community

rusticluftig commited on Sep 9, 2024

Commit

8e90a67

1 Parent(s): 877e6fb

Fix how "stale" is computed when there are multiple competitions

Browse files

Files changed (1) hide show

utils.py +13 -7

utils.py CHANGED Viewed

@@ -6,7 +6,6 @@ import math
 import os
 import time
 import traceback
-from collections import defaultdict
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
@@ -18,8 +17,6 @@ from bittensor.extrinsics.serving import get_metadata
 from dotenv import load_dotenv
 from wandb.apis.public.history import HistoryScan
-import competitions
 NETUID = 37
 DELAY_SECS = 3
 RETRIES = 3
@@ -181,8 +178,15 @@ def get_scores(
         uids (List[int]): List of UIDs to get scores for.
         wandb_runs (List): List of validator runs from Wandb. Requires the runs are provided in descending order.
     """
     result = {}
     previous_timestamp = None
     # Iterate through the runs until we've processed all the uids.
     for i, run in enumerate(wandb_runs):
         if not "original_format_json" in run.summary:
@@ -196,21 +200,23 @@ def get_scores(
         ), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
         previous_timestamp = timestamp
         for uid in uids:
             if uid in result:
                 continue
             if str(uid) in all_uid_data:
                 uid_data = all_uid_data[str(uid)]
-                # Only the most recent run is fresh.
-                is_fresh = i == 0
                 result[uid] = {
-                    "avg_loss": uid_data.get("average_loss", None),
                     "win_rate": uid_data.get("win_rate", None),
                     "win_total": uid_data.get("win_total", None),
                     "weight": uid_data.get("weight", None),
                     "competition_id": uid_data.get("competition_id", None),
                     "fresh": is_fresh,
                 }
         if len(result) == len(uids):
             break
     return result
@@ -266,7 +272,7 @@ def get_losses_over_time(wandb_runs: List, competition_id: int) -> pd.DataFrame:
                     continue
                 if loss < best_loss:
-                    best_loss = uid_data["average_loss"]
                     should_add_datapoint = True
         # Now that we've processed the run's most recent steps, check if we should add a datapoint.
         if should_add_datapoint:

 import os
 import time
 import traceback
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
 from dotenv import load_dotenv
 from wandb.apis.public.history import HistoryScan
 NETUID = 37
 DELAY_SECS = 3
 RETRIES = 3
         uids (List[int]): List of UIDs to get scores for.
         wandb_runs (List): List of validator runs from Wandb. Requires the runs are provided in descending order.
     """
+    def _maybe_convert_loss(loss: float, comp_id: int) -> float:
+        """Converts loss to score for competitions that require it."""
+        if comp_id == 2:
+            return 1 - loss if loss else None
+        return loss
     result = {}
     previous_timestamp = None
+    seen_competitions = set()
     # Iterate through the runs until we've processed all the uids.
     for i, run in enumerate(wandb_runs):
         if not "original_format_json" in run.summary:
         ), f"Timestamps are not in descending order: {timestamp} >= {previous_timestamp}"
         previous_timestamp = timestamp
+        comp_id = data.get("competition_id", None)
         for uid in uids:
             if uid in result:
                 continue
             if str(uid) in all_uid_data:
                 uid_data = all_uid_data[str(uid)]
+                # Only the most recent run per competition is fresh.
+                is_fresh = comp_id not in seen_competitions
                 result[uid] = {
+                    "avg_loss": _maybe_convert_loss(uid_data.get("average_loss", None), comp_id),
                     "win_rate": uid_data.get("win_rate", None),
                     "win_total": uid_data.get("win_total", None),
                     "weight": uid_data.get("weight", None),
                     "competition_id": uid_data.get("competition_id", None),
                     "fresh": is_fresh,
                 }
+        seen_competitions.add(comp_id)
         if len(result) == len(uids):
             break
     return result
                     continue
                 if loss < best_loss:
+                    best_loss = loss
                     should_add_datapoint = True
         # Now that we've processed the run's most recent steps, check if we should add a datapoint.
         if should_add_datapoint: