meg-huggingface
		
	commited on
		
		
					Commit 
							
							·
						
						64c3915
	
1
								Parent(s):
							
							5ea4d55
								
Backend toxicity
Browse files- main_backend_toxicity.py +85 -0
    	
        main_backend_toxicity.py
    ADDED
    
    | @@ -0,0 +1,85 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import logging
         | 
| 2 | 
            +
            import pprint
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            from huggingface_hub import snapshot_download
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            logging.getLogger("openai").setLevel(logging.DEBUG)
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from src.backend.inference_endpoint import create_endpoint
         | 
| 9 | 
            +
            from src.backend.run_toxicity_eval import main
         | 
| 10 | 
            +
            from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
         | 
| 11 | 
            +
            from src.backend.sort_queue import sort_models_by_priority
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
         | 
| 14 | 
            +
                                  EVAL_RESULTS_PATH_BACKEND, API, TOKEN)
         | 
| 15 | 
            +
            #, LIMIT, ACCELERATOR, VENDOR, REGION
         | 
| 16 | 
            +
            from src.logging import setup_logger
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            logger = setup_logger(__name__)
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            # logging.basicConfig(level=logging.ERROR)
         | 
| 21 | 
            +
            pp = pprint.PrettyPrinter(width=80)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            PENDING_STATUS = "PENDING"
         | 
| 24 | 
            +
            RUNNING_STATUS = "RUNNING"
         | 
| 25 | 
            +
            FINISHED_STATUS = "FINISHED"
         | 
| 26 | 
            +
            FAILED_STATUS = "FAILED"
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
         | 
| 29 | 
            +
            snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            def run_auto_eval():
         | 
| 32 | 
            +
                current_pending_status = [PENDING_STATUS]
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                # pull the eval dataset from the hub and parse any eval requests
         | 
| 35 | 
            +
                # check completed evals and set them to finished
         | 
| 36 | 
            +
                check_completed_evals(
         | 
| 37 | 
            +
                    api=API,
         | 
| 38 | 
            +
                    checked_status=RUNNING_STATUS,
         | 
| 39 | 
            +
                    completed_status=FINISHED_STATUS,
         | 
| 40 | 
            +
                    failed_status=FAILED_STATUS,
         | 
| 41 | 
            +
                    hf_repo=QUEUE_REPO,
         | 
| 42 | 
            +
                    local_dir=EVAL_REQUESTS_PATH_BACKEND,
         | 
| 43 | 
            +
                    hf_repo_results=RESULTS_REPO,
         | 
| 44 | 
            +
                    local_dir_results=EVAL_RESULTS_PATH_BACKEND
         | 
| 45 | 
            +
                )
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                # Get all eval request that are PENDING, if you want to run other evals, change this parameter
         | 
| 48 | 
            +
                eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
         | 
| 49 | 
            +
                # Sort the evals by priority (first submitted first run)
         | 
| 50 | 
            +
                eval_requests = sort_models_by_priority(api=API, models=eval_requests)
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                logger.info(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                if len(eval_requests) == 0:
         | 
| 55 | 
            +
                    return
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                eval_request = eval_requests[0]
         | 
| 58 | 
            +
                logger.info(pp.pformat(eval_request))
         | 
| 59 | 
            +
             | 
| 60 | 
            +
             | 
| 61 | 
            +
                set_eval_request(
         | 
| 62 | 
            +
                    api=API,
         | 
| 63 | 
            +
                    eval_request=eval_request,
         | 
| 64 | 
            +
                    set_to_status=RUNNING_STATUS,
         | 
| 65 | 
            +
                    hf_repo=QUEUE_REPO,
         | 
| 66 | 
            +
                    local_dir=EVAL_REQUESTS_PATH_BACKEND,
         | 
| 67 | 
            +
                )
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                logger.info(f'Starting Evaluation of {eval_request.json_filepath} on Inference endpoints')
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                model_repository = eval_request.model
         | 
| 72 | 
            +
                endpoint_name = re.sub("/", "-", model_repository.lower()) + "-toxicity-eval" #+ str(random.randint(0,1000))
         | 
| 73 | 
            +
                endpoint_url = create_endpoint(endpoint_name, model_repository)
         | 
| 74 | 
            +
                logger.info("Created an endpoint url at %s" % endpoint_url)
         | 
| 75 | 
            +
                results = main(endpoint_url, model_repository)
         | 
| 76 | 
            +
                logger.debug("FINISHED!")
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                #local_dir = EVAL_RESULTS_PATH_BACKEND,
         | 
| 79 | 
            +
                #limit=LIMIT
         | 
| 80 | 
            +
                #    )
         | 
| 81 | 
            +
                #logger.info(f'Completed Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}')
         | 
| 82 | 
            +
             | 
| 83 | 
            +
             | 
| 84 | 
            +
            if __name__ == "__main__":
         | 
| 85 | 
            +
                run_auto_eval()
         | 
