File size: 2,230 Bytes
bb8ff6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
VLMEVALKIT_README = 'https://raw.githubusercontent.com/open-compass/VLMEvalKit/main/README.md'
# CONSTANTS-CITATION
CITATION_BUTTON_TEXT = r"""@article{guo2025sok,

  title={{Frontier AI's Impact on the Cybersecurity Landscape}},

  author={Guo, Wenbo and Potter, Yujin and Shi, Tianneng and Wang, Zhun and Zhang, Andy and Song, Dawn},

  journal={arXiv preprint arXiv:2504.05408},

  year={2025}

}

"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
# CONSTANTS-TEXT
LEADERBORAD_INTRODUCTION = """# Cybersecurity Leaderboard

### Welcome to the Cybersecurity Leaderboard! This leaderboard is a collection of benchmarks relevant to cybersecurity capabilities. 

This leaderboard covers {} benchmarks across {} aspects of cybersecurity work.



This leaderboard was last updated: {} """
# CONSTANTS-FIELDS
# META_FIELDS = [
#     'Model'
# ]

DEFAULT_TASK = [
    'Vulnerable code generation', 'Attack generation', 'CTF', 'Cyber knowledge', 'Pen test', 'Vulnerability detection', 'PoC generation', 'Patching'
]
MMBENCH_FIELDS = ['MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'MMBench_TEST_EN', 'MMBench_TEST_CN', 'CCBench']

# The README file for each benchmark
LEADERBOARD_MD = {}

LEADERBOARD_MD['MAIN'] = """

## Main Evaluation Results



- Metrics:

  - Avg Score: The average score on {} Cybersecurity Benchmarks (normalized to 0 - 100, the higher the better). 

  - Avg Rank: The average rank on {} Cybersecurity Benchmarks (the lower the better). 

  - Avg Score & Rank are calculated based on selected benchmark. **When results for some selected benchmarks are missing, Avg Score / Rank will be None!!!** 

"""

LEADERBOARD_MD['Vulnerable code generation'] = """Need to add a description

"""
LEADERBOARD_MD['Attack generation'] = """Need to add a description

"""
LEADERBOARD_MD['CTF'] = """Need to add a description

"""
LEADERBOARD_MD['Cyber knowledge'] = """Need to add a description

"""
LEADERBOARD_MD['Pen test'] = """Need to add a description

"""
LEADERBOARD_MD['Vulnerability detection'] = """Need to add a description

"""
LEADERBOARD_MD['PoC generation'] = """Need to add a description

"""
LEADERBOARD_MD['Patching'] = """Need to add a description

"""