Spaces:

yourbench
/

demo

Running on CPU Upgrade

demo / backend /tests /run_lighteval.py

block >1mo files | translate comments in english

d6f0b38 5 months ago

1.48 kB

	import os
	import tempfile
	import subprocess
	from dotenv import load_dotenv
	import time
	from lighteval_task.lighteval_task import create_yourbench_task
	import datetime

	# Load environment variables
	load_dotenv()

	# Create temporary task file
	temp_file_path = tempfile.mktemp(suffix=".py")
	with open(temp_file_path, 'w') as temp_file:
	temp_file.write("""
	from lighteval_task.lighteval_task import create_yourbench_task

	# Create yourbench task
	yourbench = create_yourbench_task("yourbench/yourbench_fbfe278f-70c8-4579-9447-8275b94250bd", "single_shot_questions")

	# Define TASKS_TABLE needed by lighteval
	TASKS_TABLE = [yourbench]
	""")

	# Create an output folder with timestamp to avoid overwriting previous results
	output_dir = f"data/lighteval_results_strict_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"

	# LightEval command
	cmd_args = [
	"lighteval",
	"endpoint",
	"inference-providers",
	"model=Qwen/Qwen2.5-72B-Instruct,provider=novita",
	"custom\|yourbench\|0\|0",
	"--custom-tasks",
	temp_file_path,
	"--max-samples", "10",
	"--output-dir", output_dir,
	"--save-details",
	"--no-push-to-hub"
	]

	# Start timer
	start_time = time.time()

	# Run the command with environment variables
	subprocess.run(cmd_args, env=os.environ)

	# Calculate and print execution time
	execution_time = time.time() - start_time
	print(f"\nExecution time: {execution_time:.2f} seconds")
	print(f"Results saved in: {output_dir}")

	# Clean up
	os.unlink(temp_file_path)