File size: 3,634 Bytes
2dabcb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
---
base_model:
- huihui-ai/Qwen2.5-14B-Instruct-1M-abliterated
---
vllm (pretrained=/root/autodl-tmp/Qwen2.5-14B-Instruct-1M-abliterated,add_bos_token=true,max_model_len=2048,tensor_parallel_size=2,dtype=bfloat16), gen_kwargs: (None), limit: 250.0, num_fewshot: 5, batch_size: auto
|Tasks|Version|     Filter     |n-shot|  Metric   |   |Value|   |Stderr|
|-----|------:|----------------|-----:|-----------|---|----:|---|-----:|
|gsm8k|      3|flexible-extract|     5|exact_match|↑  |0.868|±  |0.0215|
|     |       |strict-match    |     5|exact_match|↑  |0.872|±  |0.0212|

vllm (pretrained=/root/autodl-tmp/Qwen2.5-14B-Instruct-1M-abliterated,add_bos_token=true,max_model_len=2048,tensor_parallel_size=2,dtype=bfloat16), gen_kwargs: (None), limit: 500.0, num_fewshot: 5, batch_size: auto
|Tasks|Version|     Filter     |n-shot|  Metric   |   |Value|   |Stderr|
|-----|------:|----------------|-----:|-----------|---|----:|---|-----:|
|gsm8k|      3|flexible-extract|     5|exact_match|↑  |0.872|±  |0.0150|
|     |       |strict-match    |     5|exact_match|↑  |0.870|±  |0.0151|

vllm (pretrained=/root/autodl-tmp/Qwen2.5-14B-Instruct-1M-abliterated,add_bos_token=true,max_model_len=700,tensor_parallel_size=2,dtype=bfloat16,enforce_eager=True), gen_kwargs: (None), limit: 7.0, num_fewshot: None, batch_size: 1
|      Groups      |Version|Filter|n-shot|Metric|   |Value |   |Stderr|
|------------------|------:|------|------|------|---|-----:|---|-----:|
|mmlu              |      2|none  |      |acc   |↑  |0.7769|±  |0.0202|
| - humanities     |      2|none  |      |acc   |↑  |0.7692|±  |0.0440|
| - other          |      2|none  |      |acc   |↑  |0.7582|±  |0.0406|
| - social sciences|      2|none  |      |acc   |↑  |0.8452|±  |0.0376|
| - stem           |      2|none  |      |acc   |↑  |0.7519|±  |0.0376|


vllm (pretrained=/root/autodl-tmp/Qwen2.5-14B-Instruct-1M-abliterated-87,add_bos_token=true,max_model_len=2048,tensor_parallel_size=2,dtype=bfloat16), gen_kwargs: (None), limit: 250.0, num_fewshot: 5, batch_size: auto
|Tasks|Version|     Filter     |n-shot|  Metric   |   |Value|   |Stderr|
|-----|------:|----------------|-----:|-----------|---|----:|---|-----:|
|gsm8k|      3|flexible-extract|     5|exact_match|↑  |0.864|±  |0.0217|
|     |       |strict-match    |     5|exact_match|↑  |0.864|±  |0.0217|

vllm (pretrained=/root/autodl-tmp/Qwen2.5-14B-Instruct-1M-abliterated-87,add_bos_token=true,max_model_len=2048,tensor_parallel_size=2,dtype=bfloat16), gen_kwargs: (None), limit: 500.0, num_fewshot: 5, batch_size: auto
|Tasks|Version|     Filter     |n-shot|  Metric   |   |Value|   |Stderr|
|-----|------:|----------------|-----:|-----------|---|----:|---|-----:|
|gsm8k|      3|flexible-extract|     5|exact_match|↑  |0.882|±  |0.0144|
|     |       |strict-match    |     5|exact_match|↑  |0.874|±  |0.0149|

vllm (pretrained=/root/autodl-tmp/Qwen2.5-14B-Instruct-1M-abliterated-87,add_bos_token=true,max_model_len=700,tensor_parallel_size=2,dtype=bfloat16,enforce_eager=True), gen_kwargs: (None), limit: 7.0, num_fewshot: None, batch_size: 1
|      Groups      |Version|Filter|n-shot|Metric|   |Value |   |Stderr|
|------------------|------:|------|------|------|---|-----:|---|-----:|
|mmlu              |      2|none  |      |acc   |↑  |0.7769|±  |0.0201|
| - humanities     |      2|none  |      |acc   |↑  |0.7692|±  |0.0440|
| - other          |      2|none  |      |acc   |↑  |0.7692|±  |0.0391|
| - social sciences|      2|none  |      |acc   |↑  |0.8333|±  |0.0370|
| - stem           |      2|none  |      |acc   |↑  |0.7519|±  |0.0381|