yujinyujin9393's picture
Upload 7 files
bb8ff6c verified
raw
history blame
2.23 kB
VLMEVALKIT_README = 'https://raw.githubusercontent.com/open-compass/VLMEvalKit/main/README.md'
# CONSTANTS-CITATION
CITATION_BUTTON_TEXT = r"""@article{guo2025sok,
title={{Frontier AI's Impact on the Cybersecurity Landscape}},
author={Guo, Wenbo and Potter, Yujin and Shi, Tianneng and Wang, Zhun and Zhang, Andy and Song, Dawn},
journal={arXiv preprint arXiv:2504.05408},
year={2025}
}
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
# CONSTANTS-TEXT
LEADERBORAD_INTRODUCTION = """# Cybersecurity Leaderboard
### Welcome to the Cybersecurity Leaderboard! This leaderboard is a collection of benchmarks relevant to cybersecurity capabilities.
This leaderboard covers {} benchmarks across {} aspects of cybersecurity work.
This leaderboard was last updated: {} """
# CONSTANTS-FIELDS
# META_FIELDS = [
# 'Model'
# ]
DEFAULT_TASK = [
'Vulnerable code generation', 'Attack generation', 'CTF', 'Cyber knowledge', 'Pen test', 'Vulnerability detection', 'PoC generation', 'Patching'
]
MMBENCH_FIELDS = ['MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'MMBench_TEST_EN', 'MMBench_TEST_CN', 'CCBench']
# The README file for each benchmark
LEADERBOARD_MD = {}
LEADERBOARD_MD['MAIN'] = """
## Main Evaluation Results
- Metrics:
- Avg Score: The average score on {} Cybersecurity Benchmarks (normalized to 0 - 100, the higher the better).
- Avg Rank: The average rank on {} Cybersecurity Benchmarks (the lower the better).
- Avg Score & Rank are calculated based on selected benchmark. **When results for some selected benchmarks are missing, Avg Score / Rank will be None!!!**
"""
LEADERBOARD_MD['Vulnerable code generation'] = """Need to add a description
"""
LEADERBOARD_MD['Attack generation'] = """Need to add a description
"""
LEADERBOARD_MD['CTF'] = """Need to add a description
"""
LEADERBOARD_MD['Cyber knowledge'] = """Need to add a description
"""
LEADERBOARD_MD['Pen test'] = """Need to add a description
"""
LEADERBOARD_MD['Vulnerability detection'] = """Need to add a description
"""
LEADERBOARD_MD['PoC generation'] = """Need to add a description
"""
LEADERBOARD_MD['Patching'] = """Need to add a description
"""