import os import tempfile import subprocess from dotenv import load_dotenv import time from lighteval_task.lighteval_task import create_yourbench_task import datetime # Load environment variables load_dotenv() # Create temporary task file temp_file_path = tempfile.mktemp(suffix=".py") with open(temp_file_path, 'w') as temp_file: temp_file.write(""" from lighteval_task.lighteval_task import create_yourbench_task # Create yourbench task yourbench = create_yourbench_task("yourbench/yourbench_fbfe278f-70c8-4579-9447-8275b94250bd", "single_shot_questions") # Define TASKS_TABLE needed by lighteval TASKS_TABLE = [yourbench] """) # Créer un dossier de sortie avec timestamp pour éviter d'écraser les anciens résultats output_dir = f"data/lighteval_results_strict_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" # LightEval command cmd_args = [ "lighteval", "endpoint", "inference-providers", "model=Qwen/Qwen2.5-72B-Instruct,provider=novita", "custom|yourbench|0|0", "--custom-tasks", temp_file_path, "--max-samples", "10", "--output-dir", output_dir, "--save-details", "--no-push-to-hub" ] # Start timer start_time = time.time() # Run the command with environment variables subprocess.run(cmd_args, env=os.environ) # Calculate and print execution time execution_time = time.time() - start_time print(f"\nTemps d'exécution : {execution_time:.2f} secondes") print(f"Résultats sauvegardés dans : {output_dir}") # Clean up os.unlink(temp_file_path)