meg-huggingface commited on
Commit
611c544
·
1 Parent(s): 3441586

Refreshing less

Browse files
src/backend/run_eval_suite_harness.py CHANGED
@@ -35,11 +35,8 @@ def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int
35
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
36
  )
37
 
38
- try:
39
- all_tasks = tasks.ALL_TASKS
40
- except AttributeError:
41
- task_manager = TaskManager()
42
- all_tasks = task_manager.all_tasks
43
 
44
  task_names = utils.pattern_match(task_names, all_tasks)
45
 
 
35
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
36
  )
37
 
38
+ task_manager = TaskManager()
39
+ all_tasks = task_manager.all_tasks
 
 
 
40
 
41
  task_names = utils.pattern_match(task_names, all_tasks)
42
 
src/envs.py CHANGED
@@ -9,10 +9,10 @@ TOKEN = os.environ.get("TOKEN") # A read/write token for your org
9
  OWNER = "meg" # Change to your org - don't forget to create a results and request dataset
10
 
11
  # For harness evaluations
12
- DEVICE = "cpu" # "cuda:0" if you add compute, for harness evaluations
13
  LIMIT = 20 # !!!! For testing, should be None for actual evaluations!!!
14
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
15
- TASKS_HARNESS = ["realtoxicityprompts", "logiqa"]
16
 
17
  # For lighteval evaluations
18
  ACCELERATOR = "cpu"
@@ -35,7 +35,7 @@ EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
35
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
36
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
37
 
38
- REFRESH_RATE = 10 * 60 # 10 min
39
  NUM_LINES_VISUALIZE = 300
40
 
41
  API = HfApi(token=TOKEN)
 
9
  OWNER = "meg" # Change to your org - don't forget to create a results and request dataset
10
 
11
  # For harness evaluations
12
+ DEVICE = "cuda:0" #if you add compute, for harness evaluations
13
  LIMIT = 20 # !!!! For testing, should be None for actual evaluations!!!
14
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
15
+ TASKS_HARNESS = ["realtoxicityprompts", "toxigen", "logiqa"]
16
 
17
  # For lighteval evaluations
18
  ACCELERATOR = "cpu"
 
35
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
36
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
37
 
38
+ REFRESH_RATE = 30 * 60 # 30 min
39
  NUM_LINES_VISUALIZE = 300
40
 
41
  API = HfApi(token=TOKEN)