tags: | |
- merge | |
- mergekit | |
- lazymergekit | |
model-index: | |
- name: Halo-14B-sce | |
results: | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: IFEval (0-Shot) | |
type: HuggingFaceH4/ifeval | |
args: | |
num_few_shot: 0 | |
metrics: | |
- type: inst_level_strict_acc and prompt_level_strict_acc | |
value: 67.54 | |
name: strict accuracy | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/Halo-14B-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: BBH (3-Shot) | |
type: BBH | |
args: | |
num_few_shot: 3 | |
metrics: | |
- type: acc_norm | |
value: 55.27 | |
name: normalized accuracy | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/Halo-14B-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: MATH Lvl 5 (4-Shot) | |
type: hendrycks/competition_math | |
args: | |
num_few_shot: 4 | |
metrics: | |
- type: exact_match | |
value: 40.56 | |
name: exact match | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/Halo-14B-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: GPQA (0-shot) | |
type: Idavidrein/gpqa | |
args: | |
num_few_shot: 0 | |
metrics: | |
- type: acc_norm | |
value: 12.98 | |
name: acc_norm | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/Halo-14B-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: MuSR (0-shot) | |
type: TAUR-Lab/MuSR | |
args: | |
num_few_shot: 0 | |
metrics: | |
- type: acc_norm | |
value: 14.24 | |
name: acc_norm | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/Halo-14B-sce | |
name: Open LLM Leaderboard | |
- task: | |
type: text-generation | |
name: Text Generation | |
dataset: | |
name: MMLU-PRO (5-shot) | |
type: TIGER-Lab/MMLU-Pro | |
config: main | |
split: test | |
args: | |
num_few_shot: 5 | |
metrics: | |
- type: acc | |
value: 48.63 | |
name: accuracy | |
source: | |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Quazim0t0/Halo-14B-sce | |
name: Open LLM Leaderboard | |
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) | |
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/Quazim0t0__Halo-14B-sce-details) | |
| Metric |Value| | |
|-------------------|----:| | |
|Avg. |39.87| | |
|IFEval (0-Shot) |67.54| | |
|BBH (3-Shot) |55.27| | |
|MATH Lvl 5 (4-Shot)|40.56| | |
|GPQA (0-shot) |12.98| | |
|MuSR (0-shot) |14.24| | |
|MMLU-PRO (5-shot) |48.63| | |