meg-huggingface
commited on
Commit
·
30b5f7e
1
Parent(s):
611c544
Updates, not sure what -- left over from last night
Browse files
main_backend_harness.py
CHANGED
@@ -13,8 +13,6 @@ from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_
|
|
13 |
from src.envs import TASKS_HARNESS, NUM_FEWSHOT
|
14 |
from src.logging import setup_logger
|
15 |
|
16 |
-
|
17 |
-
|
18 |
# logging.basicConfig(level=logging.ERROR)
|
19 |
logger = setup_logger(__name__)
|
20 |
pp = pprint.PrettyPrinter(width=80)
|
|
|
13 |
from src.envs import TASKS_HARNESS, NUM_FEWSHOT
|
14 |
from src.logging import setup_logger
|
15 |
|
|
|
|
|
16 |
# logging.basicConfig(level=logging.ERROR)
|
17 |
logger = setup_logger(__name__)
|
18 |
pp = pprint.PrettyPrinter(width=80)
|
src/backend/run_eval_suite_harness.py
CHANGED
@@ -55,6 +55,8 @@ def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int
|
|
55 |
write_out=True,
|
56 |
)
|
57 |
|
|
|
|
|
58 |
results["config"]["model_dtype"] = eval_request.precision
|
59 |
results["config"]["model_name"] = eval_request.model
|
60 |
results["config"]["model_sha"] = eval_request.revision
|
|
|
55 |
write_out=True,
|
56 |
)
|
57 |
|
58 |
+
# TODO: For Toxicity, substract from 100.
|
59 |
+
# results["realtoxicityprompts"]["score"] = 100 - results["realtoxicityprompts"]["score"]
|
60 |
results["config"]["model_dtype"] = eval_request.precision
|
61 |
results["config"]["model_name"] = eval_request.model
|
62 |
results["config"]["model_sha"] = eval_request.revision
|