lewtun HF Staff commited on
Commit
b6155d5
·
1 Parent(s): 5396149
eval_results/152334H/miqu-1-70b-sf/main/arc/results_2024-03-02T21-57-55.211943.json DELETED
@@ -1,90 +0,0 @@
1
- {
2
- "config_general": {
3
- "lighteval_sha": "?",
4
- "num_fewshot_seeds": 1,
5
- "override_batch_size": 1,
6
- "max_samples": null,
7
- "job_id": "",
8
- "start_time": 633653.372401504,
9
- "end_time": 644458.122759118,
10
- "total_evaluation_time_secondes": "10804.750357614015",
11
- "model_name": "152334H/miqu-1-70b-sf",
12
- "model_sha": "1dca4cce36f01f2104ee2e6b97bac6ff7bb300c1",
13
- "model_dtype": "torch.bfloat16",
14
- "model_size": "129.85 GB",
15
- "config": null
16
- },
17
- "results": {
18
- "lighteval|arc:challenge|25": {
19
- "acc": 0.7244027303754266,
20
- "acc_stderr": 0.013057169655761838,
21
- "acc_norm": 0.7542662116040956,
22
- "acc_norm_stderr": 0.012581033453730113
23
- }
24
- },
25
- "versions": {
26
- "lighteval|arc:challenge|25": 0
27
- },
28
- "config_tasks": {
29
- "lighteval|arc:challenge": {
30
- "name": "arc:challenge",
31
- "prompt_function": "arc",
32
- "hf_repo": "ai2_arc",
33
- "hf_subset": "ARC-Challenge",
34
- "metric": [
35
- "loglikelihood_acc",
36
- "loglikelihood_acc_norm_nospace"
37
- ],
38
- "hf_avail_splits": [
39
- "train",
40
- "test"
41
- ],
42
- "evaluation_splits": [
43
- "test"
44
- ],
45
- "few_shots_split": null,
46
- "few_shots_select": "random_sampling_from_train",
47
- "generation_size": 1,
48
- "stop_sequence": [
49
- "\n"
50
- ],
51
- "output_regex": null,
52
- "frozen": false,
53
- "suite": [
54
- "lighteval",
55
- "arc"
56
- ],
57
- "original_num_docs": 1172,
58
- "effective_num_docs": 1172
59
- }
60
- },
61
- "summary_tasks": {
62
- "lighteval|arc:challenge|25": {
63
- "hashes": {
64
- "hash_examples": "17b0cae357c0259e",
65
- "hash_full_prompts": "d44b17600302e2b8",
66
- "hash_input_tokens": "b4a0c028b5247793",
67
- "hash_cont_tokens": "e8abf848493b50f7"
68
- },
69
- "truncated": 0,
70
- "non_truncated": 1172,
71
- "padded": 4661,
72
- "non_padded": 26,
73
- "effective_few_shots": 25.0,
74
- "num_truncated_few_shots": 0
75
- }
76
- },
77
- "summary_general": {
78
- "hashes": {
79
- "hash_examples": "aaa6929c6d3771fb",
80
- "hash_full_prompts": "b29ea0f4ca87b218",
81
- "hash_input_tokens": "57009ae3c9858632",
82
- "hash_cont_tokens": "ba41a41319a6da52"
83
- },
84
- "truncated": 0,
85
- "non_truncated": 1172,
86
- "padded": 4661,
87
- "non_padded": 26,
88
- "num_truncated_few_shots": 0
89
- }
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results/152334H/miqu-1-70b-sf/main/truthfulqa/results_2024-03-03T00-56-46.153531.json DELETED
@@ -1,87 +0,0 @@
1
- {
2
- "config_general": {
3
- "lighteval_sha": "?",
4
- "num_fewshot_seeds": 1,
5
- "override_batch_size": 1,
6
- "max_samples": null,
7
- "job_id": "",
8
- "start_time": 3724720.693291109,
9
- "end_time": 3746239.242535963,
10
- "total_evaluation_time_secondes": "21518.549244854134",
11
- "model_name": "152334H/miqu-1-70b-sf",
12
- "model_sha": "1dca4cce36f01f2104ee2e6b97bac6ff7bb300c1",
13
- "model_dtype": "torch.bfloat16",
14
- "model_size": "129.85 GB",
15
- "config": null
16
- },
17
- "results": {
18
- "lighteval|truthfulqa:mc|0": {
19
- "truthfulqa_mc1": 0.5495716034271726,
20
- "truthfulqa_mc1_stderr": 0.017417264371967642,
21
- "truthfulqa_mc2": 0.7161892063222531,
22
- "truthfulqa_mc2_stderr": 0.015092456307260364
23
- }
24
- },
25
- "versions": {
26
- "lighteval|truthfulqa:mc|0": 0
27
- },
28
- "config_tasks": {
29
- "lighteval|truthfulqa:mc": {
30
- "name": "truthfulqa:mc",
31
- "prompt_function": "truthful_qa_multiple_choice",
32
- "hf_repo": "truthful_qa",
33
- "hf_subset": "multiple_choice",
34
- "metric": [
35
- "truthfulqa_mc_metrics"
36
- ],
37
- "hf_avail_splits": [
38
- "validation"
39
- ],
40
- "evaluation_splits": [
41
- "validation"
42
- ],
43
- "few_shots_split": null,
44
- "few_shots_select": null,
45
- "generation_size": -1,
46
- "stop_sequence": [
47
- "\n"
48
- ],
49
- "output_regex": null,
50
- "frozen": false,
51
- "suite": [
52
- "lighteval"
53
- ],
54
- "original_num_docs": 817,
55
- "effective_num_docs": 817
56
- }
57
- },
58
- "summary_tasks": {
59
- "lighteval|truthfulqa:mc|0": {
60
- "hashes": {
61
- "hash_examples": "36a6d90e75d92d4a",
62
- "hash_full_prompts": "bc19700dcc192702",
63
- "hash_input_tokens": "691231bfc79d7533",
64
- "hash_cont_tokens": "f5da56a132aab151"
65
- },
66
- "truncated": 0,
67
- "non_truncated": 817,
68
- "padded": 9623,
69
- "non_padded": 373,
70
- "effective_few_shots": 0.0,
71
- "num_truncated_few_shots": 0
72
- }
73
- },
74
- "summary_general": {
75
- "hashes": {
76
- "hash_examples": "aed1dfc67e53d0f2",
77
- "hash_full_prompts": "046a77ce3ce1d5d3",
78
- "hash_input_tokens": "19c9780a6766a7b6",
79
- "hash_cont_tokens": "52845ca5a27c2b40"
80
- },
81
- "truncated": 0,
82
- "non_truncated": 817,
83
- "padded": 9623,
84
- "non_padded": 373,
85
- "num_truncated_few_shots": 0
86
- }
87
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results/152334H/miqu-1-70b-sf/main/winogrande/results_2024-03-02T20-34-53.564701.json DELETED
@@ -1,87 +0,0 @@
1
- {
2
- "config_general": {
3
- "lighteval_sha": "?",
4
- "num_fewshot_seeds": 1,
5
- "override_batch_size": 1,
6
- "max_samples": null,
7
- "job_id": "",
8
- "start_time": 3709420.364256513,
9
- "end_time": 3715228.232641632,
10
- "total_evaluation_time_secondes": "5807.868385119364",
11
- "model_name": "152334H/miqu-1-70b-sf",
12
- "model_sha": "1dca4cce36f01f2104ee2e6b97bac6ff7bb300c1",
13
- "model_dtype": "torch.bfloat16",
14
- "model_size": "129.85 GB",
15
- "config": null
16
- },
17
- "results": {
18
- "lighteval|winogrande|5": {
19
- "acc": 0.7955801104972375,
20
- "acc_stderr": 0.011334090612597212
21
- }
22
- },
23
- "versions": {
24
- "lighteval|winogrande|5": 0
25
- },
26
- "config_tasks": {
27
- "lighteval|winogrande": {
28
- "name": "winogrande",
29
- "prompt_function": "winogrande",
30
- "hf_repo": "winogrande",
31
- "hf_subset": "winogrande_xl",
32
- "metric": [
33
- "loglikelihood_acc"
34
- ],
35
- "hf_avail_splits": [
36
- "train",
37
- "test",
38
- "validation"
39
- ],
40
- "evaluation_splits": [
41
- "validation"
42
- ],
43
- "few_shots_split": null,
44
- "few_shots_select": "random_sampling",
45
- "generation_size": -1,
46
- "stop_sequence": [
47
- "\n"
48
- ],
49
- "output_regex": null,
50
- "frozen": false,
51
- "suite": [
52
- "lighteval"
53
- ],
54
- "original_num_docs": 1267,
55
- "effective_num_docs": 1267
56
- }
57
- },
58
- "summary_tasks": {
59
- "lighteval|winogrande|5": {
60
- "hashes": {
61
- "hash_examples": "087d5d1a1afd4c7b",
62
- "hash_full_prompts": "21d1cf75825bfc31",
63
- "hash_input_tokens": "1a0da9cddc6ebfb9",
64
- "hash_cont_tokens": "3d7ba882ca59844b"
65
- },
66
- "truncated": 0,
67
- "non_truncated": 1267,
68
- "padded": 2413,
69
- "non_padded": 121,
70
- "effective_few_shots": 5.0,
71
- "num_truncated_few_shots": 0
72
- }
73
- },
74
- "summary_general": {
75
- "hashes": {
76
- "hash_examples": "b9a49975cc41fab7",
77
- "hash_full_prompts": "9056d6e5dbbb5d94",
78
- "hash_input_tokens": "eb15adadcb39e2da",
79
- "hash_cont_tokens": "0a779f58fe02fb16"
80
- },
81
- "truncated": 0,
82
- "non_truncated": 1267,
83
- "padded": 2413,
84
- "non_padded": 121,
85
- "num_truncated_few_shots": 0
86
- }
87
- }