|
""" |
|
Handle submissions to the GuardBench leaderboard. |
|
""" |
|
|
|
import json |
|
import os |
|
import tempfile |
|
import uuid |
|
from datetime import datetime |
|
from typing import Dict, List, Tuple |
|
|
|
from huggingface_hub import HfApi |
|
from datasets import load_dataset, Dataset |
|
|
|
from src.display.formatting import styled_error, styled_message, styled_warning |
|
from src.envs import API, RESULTS_DATASET_ID, TOKEN |
|
from src.leaderboard.processor import process_jsonl_submission, add_entries_to_leaderboard, load_leaderboard_data |
|
|
|
|
|
def validate_submission(file_path: str) -> Tuple[bool, str]: |
|
""" |
|
Validate a submission file. |
|
""" |
|
try: |
|
entries, message = process_jsonl_submission(file_path) |
|
if not entries: |
|
return False, message |
|
|
|
|
|
|
|
return True, "Submission is valid" |
|
except Exception as e: |
|
return False, f"Error validating submission: {e}" |
|
|
|
|
|
def submit_to_hub(file_path: str, metadata: Dict, dataset_id: str, token: str, version="v0") -> Tuple[bool, str]: |
|
""" |
|
Submit results to a HuggingFace dataset repository as individual files. |
|
|
|
Args: |
|
file_path: Path to the submission file |
|
metadata: Metadata to include with the submission |
|
dataset_id: The dataset repository ID |
|
token: HuggingFace API token |
|
version: The version of the benchmark used (e.g., "v0", "v1") |
|
""" |
|
try: |
|
|
|
entries, message = process_jsonl_submission(file_path) |
|
if not entries: |
|
return False, message |
|
|
|
|
|
model_name = metadata.get("model_name", "unknown") |
|
model_name_safe = model_name.replace("/", "_").replace(" ", "_") |
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') |
|
submission_id = f"{model_name_safe}_{timestamp}" |
|
|
|
|
|
api = HfApi(token=token) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as temp_file: |
|
|
|
for entry in entries: |
|
|
|
if "model_name" not in entry: |
|
entry["model_name"] = metadata.get("model_name") |
|
|
|
|
|
for key, value in metadata.items(): |
|
if key != "model_name" and key not in entry: |
|
entry[key] = value |
|
|
|
|
|
entry["version"] = version |
|
|
|
|
|
temp_file.write(json.dumps(entry) + "\n") |
|
|
|
temp_path = temp_file.name |
|
|
|
|
|
submission_path = f"submissions_{version}/{submission_id}_{version}.jsonl" if version != "v0" else f"submissions/{submission_id}.jsonl" |
|
api.upload_file( |
|
path_or_fileobj=temp_path, |
|
path_in_repo=submission_path, |
|
repo_id=dataset_id, |
|
repo_type="dataset", |
|
commit_message=f"Add submission for {model_name} (version {version})" |
|
) |
|
|
|
|
|
os.unlink(temp_path) |
|
|
|
return True, f"Successfully uploaded submission for {model_name} to {dataset_id} (version {version})" |
|
except Exception as e: |
|
return False, f"Error submitting to dataset: {e}" |
|
|
|
|
|
def process_submission(file_path: str, metadata: Dict, version="v0") -> str: |
|
""" |
|
Process a submission to the GuardBench leaderboard. |
|
|
|
Args: |
|
file_path: Path to the submission file |
|
metadata: Metadata to include with the submission |
|
version: The version of the benchmark used (e.g., "v0", "v1") |
|
""" |
|
|
|
is_valid, validation_message = validate_submission(file_path) |
|
if not is_valid: |
|
return styled_error(validation_message) |
|
|
|
|
|
metadata["version"] = version |
|
|
|
|
|
success, message = submit_to_hub(file_path, metadata, RESULTS_DATASET_ID, TOKEN, version=version) |
|
if not success: |
|
return styled_error(message) |
|
|
|
return styled_message(f"Submission successful! {message}") |
|
|