File size: 1,516 Bytes
39acd70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import tempfile
import subprocess
from dotenv import load_dotenv
import time
from lighteval_task.lighteval_task import create_yourbench_task
import datetime

# Load environment variables
load_dotenv()

# Create temporary task file
temp_file_path = tempfile.mktemp(suffix=".py")
with open(temp_file_path, 'w') as temp_file:
    temp_file.write("""
from lighteval_task.lighteval_task import create_yourbench_task

# Create yourbench task
yourbench = create_yourbench_task("yourbench/yourbench_fbfe278f-70c8-4579-9447-8275b94250bd", "single_shot_questions")

# Define TASKS_TABLE needed by lighteval
TASKS_TABLE = [yourbench]
""")

# Créer un dossier de sortie avec timestamp pour éviter d'écraser les anciens résultats
output_dir = f"data/lighteval_results_strict_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"

# LightEval command
cmd_args = [
    "lighteval",
    "endpoint",
    "inference-providers",
    "model=Qwen/Qwen2.5-72B-Instruct,provider=novita",
    "custom|yourbench|0|0",
    "--custom-tasks",
    temp_file_path,
    "--max-samples", "10",
    "--output-dir", output_dir,
    "--save-details",
    "--no-push-to-hub"
]

# Start timer
start_time = time.time()

# Run the command with environment variables
subprocess.run(cmd_args, env=os.environ)

# Calculate and print execution time
execution_time = time.time() - start_time
print(f"\nTemps d'exécution : {execution_time:.2f} secondes")
print(f"Résultats sauvegardés dans : {output_dir}")

# Clean up
os.unlink(temp_file_path)