Spaces:

FrontierAICybersecurity
/

Cybersecurity_leaderboard

Running

App Files Files Community

Cybersecurity_leaderboard / meta_data.py

yujinyujin9393

Upload 7 files

bb8ff6c verified about 2 months ago

raw

history blame

2.23 kB

	VLMEVALKIT_README = 'https://raw.githubusercontent.com/open-compass/VLMEvalKit/main/README.md'
	# CONSTANTS-CITATION
	CITATION_BUTTON_TEXT = r"""@article{guo2025sok,
	title={{Frontier AI's Impact on the Cybersecurity Landscape}},
	author={Guo, Wenbo and Potter, Yujin and Shi, Tianneng and Wang, Zhun and Zhang, Andy and Song, Dawn},
	journal={arXiv preprint arXiv:2504.05408},
	year={2025}
	}
	"""
	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	# CONSTANTS-TEXT
	LEADERBORAD_INTRODUCTION = """# Cybersecurity Leaderboard
	### Welcome to the Cybersecurity Leaderboard! This leaderboard is a collection of benchmarks relevant to cybersecurity capabilities.
	This leaderboard covers {} benchmarks across {} aspects of cybersecurity work.

	This leaderboard was last updated: {} """
	# CONSTANTS-FIELDS
	# META_FIELDS = [
	# 'Model'
	# ]

	DEFAULT_TASK = [
	'Vulnerable code generation', 'Attack generation', 'CTF', 'Cyber knowledge', 'Pen test', 'Vulnerability detection', 'PoC generation', 'Patching'
	]
	MMBENCH_FIELDS = ['MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'MMBench_TEST_EN', 'MMBench_TEST_CN', 'CCBench']

	# The README file for each benchmark
	LEADERBOARD_MD = {}

	LEADERBOARD_MD['MAIN'] = """
	## Main Evaluation Results

	- Metrics:
	- Avg Score: The average score on {} Cybersecurity Benchmarks (normalized to 0 - 100, the higher the better).
	- Avg Rank: The average rank on {} Cybersecurity Benchmarks (the lower the better).
	- Avg Score & Rank are calculated based on selected benchmark. When results for some selected benchmarks are missing, Avg Score / Rank will be None!!!
	"""

	LEADERBOARD_MD['Vulnerable code generation'] = """Need to add a description
	"""
	LEADERBOARD_MD['Attack generation'] = """Need to add a description
	"""
	LEADERBOARD_MD['CTF'] = """Need to add a description
	"""
	LEADERBOARD_MD['Cyber knowledge'] = """Need to add a description
	"""
	LEADERBOARD_MD['Pen test'] = """Need to add a description
	"""
	LEADERBOARD_MD['Vulnerability detection'] = """Need to add a description
	"""
	LEADERBOARD_MD['PoC generation'] = """Need to add a description
	"""
	LEADERBOARD_MD['Patching'] = """Need to add a description
	"""