Clémentine
commited on
Commit
·
1d6da9d
1
Parent(s):
6902167
force cleanup on failure
Browse files
src/backend/run_eval_suite_lighteval.py
CHANGED
|
@@ -3,9 +3,9 @@ import os
|
|
| 3 |
import logging
|
| 4 |
from datetime import datetime
|
| 5 |
|
| 6 |
-
from lighteval.main_accelerate import main
|
| 7 |
|
| 8 |
-
from src.envs import RESULTS_REPO, CACHE_PATH
|
| 9 |
from src.backend.manage_requests import EvalRequest
|
| 10 |
|
| 11 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
@@ -14,32 +14,42 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
| 14 |
if limit:
|
| 15 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
return results
|
|
|
|
| 3 |
import logging
|
| 4 |
from datetime import datetime
|
| 5 |
|
| 6 |
+
from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
|
| 7 |
|
| 8 |
+
from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN
|
| 9 |
from src.backend.manage_requests import EvalRequest
|
| 10 |
|
| 11 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
|
|
| 14 |
if limit:
|
| 15 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
| 16 |
|
| 17 |
+
args = {
|
| 18 |
+
"endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
|
| 19 |
+
"accelerator": accelerator,
|
| 20 |
+
"vendor": vendor,
|
| 21 |
+
"region": region,
|
| 22 |
+
"instance_size": instance_size,
|
| 23 |
+
"instance_type": instance_type,
|
| 24 |
+
"max_samples": limit,
|
| 25 |
+
"job_id": str(datetime.now()),
|
| 26 |
+
"push_results_to_hub": True,
|
| 27 |
+
"save_details": True,
|
| 28 |
+
"push_details_to_hub": True,
|
| 29 |
+
"public_run": False,
|
| 30 |
+
"cache_dir": CACHE_PATH,
|
| 31 |
+
"results_org": RESULTS_REPO,
|
| 32 |
+
"output_dir": local_dir,
|
| 33 |
+
"override_batch_size": batch_size,
|
| 34 |
+
"custom_tasks": "custom_tasks.py",
|
| 35 |
+
"tasks": task_names
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
results = main(args)
|
| 40 |
+
|
| 41 |
+
results["config"]["model_dtype"] = eval_request.precision
|
| 42 |
+
results["config"]["model_name"] = eval_request.model
|
| 43 |
+
results["config"]["model_sha"] = eval_request.revision
|
| 44 |
+
|
| 45 |
+
dumped = json.dumps(results, indent=2)
|
| 46 |
+
print(dumped)
|
| 47 |
+
except Exception: # if eval failed, we force a cleanup
|
| 48 |
+
env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
|
| 49 |
+
|
| 50 |
+
model_config = create_model_config(args=args, accelerator=accelerator)
|
| 51 |
+
model, _ = load_model(config=model_config, env_config=env_config)
|
| 52 |
+
model.cleanup()
|
| 53 |
+
|
| 54 |
|
| 55 |
return results
|