File size: 2,789 Bytes
1ffc326
 
 
 
 
d295ed3
8b85b8d
 
 
 
d295ed3
 
 
 
8b85b8d
1ffc326
d295ed3
8b85b8d
 
d295ed3
 
 
8b85b8d
 
 
 
d295ed3
 
8b88d2c
3e6770c
 
d295ed3
1ffc326
8b88d2c
 
1ffc326
 
 
d295ed3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ffc326
 
 
 
 
 
 
 
 
 
 
 
 
 
d295ed3
1ffc326
 
 
d295ed3
 
 
1ffc326
 
 
 
 
 
 
 
 
8b88d2c
1ffc326
 
 
 
 
 
 
 
 
 
d295ed3
 
 
1ffc326
 
5375d59
d295ed3
 
 
1ffc326
 
 
d295ed3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import logging
import pprint

from huggingface_hub import snapshot_download

from src.backend.manage_requests import (
    FAILED_STATUS,
    FINISHED_STATUS,
    PENDING_STATUS,
    RUNNING_STATUS,
    check_completed_evals,
    get_eval_requests,
    set_eval_request,
)
from src.backend.run_eval_suite_harness import run_evaluation
from src.backend.sort_queue import sort_models_by_priority
from src.envs import (
    API,
    DEVICE,
    EVAL_REQUESTS_PATH_BACKEND,
    EVAL_RESULTS_PATH_BACKEND,
    LIMIT,
    NUM_FEWSHOT,
    QUEUE_REPO,
    RESULTS_REPO,
    TASKS_HARNESS,
    TOKEN,
)
from src.logging import setup_logger


logging.getLogger("openai").setLevel(logging.WARNING)

# logging.basicConfig(level=logging.ERROR)
logger = setup_logger(__name__)
pp = pprint.PrettyPrinter(width=80)


snapshot_download(
    repo_id=RESULTS_REPO,
    revision="main",
    local_dir=EVAL_RESULTS_PATH_BACKEND,
    repo_type="dataset",
    max_workers=60,
    token=TOKEN,
)
snapshot_download(
    repo_id=QUEUE_REPO,
    revision="main",
    local_dir=EVAL_REQUESTS_PATH_BACKEND,
    repo_type="dataset",
    max_workers=60,
    token=TOKEN,
)


def run_auto_eval():
    current_pending_status = [PENDING_STATUS]

    # pull the eval dataset from the hub and parse any eval requests
    # check completed evals and set them to finished
    check_completed_evals(
        api=API,
        checked_status=RUNNING_STATUS,
        completed_status=FINISHED_STATUS,
        failed_status=FAILED_STATUS,
        hf_repo=QUEUE_REPO,
        local_dir=EVAL_REQUESTS_PATH_BACKEND,
        hf_repo_results=RESULTS_REPO,
        local_dir_results=EVAL_RESULTS_PATH_BACKEND,
    )

    # Get all eval request that are PENDING, if you want to run other evals, change this parameter
    eval_requests = get_eval_requests(
        job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND
    )
    # Sort the evals by priority (first submitted first run)
    eval_requests = sort_models_by_priority(api=API, models=eval_requests)

    print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")

    if len(eval_requests) == 0:
        return

    eval_request = eval_requests[0]
    logger.info(pp.pformat(eval_request))

    set_eval_request(
        api=API,
        eval_request=eval_request,
        set_to_status=RUNNING_STATUS,
        hf_repo=QUEUE_REPO,
        local_dir=EVAL_REQUESTS_PATH_BACKEND,
    )

    run_evaluation(
        eval_request=eval_request,
        task_names=TASKS_HARNESS,
        num_fewshot=NUM_FEWSHOT,
        local_dir=EVAL_RESULTS_PATH_BACKEND,
        results_repo=RESULTS_REPO,
        batch_size="auto",
        device=DEVICE,
        limit=LIMIT,
    )


if __name__ == "__main__":
    run_auto_eval()