abhishek commited on
Commit
fc3d7cc
·
verified ·
1 Parent(s): 6be9a3d

Upload eval_results/abhishek/autotrain-mixtral-8x7b-orpo-v1/main/truthfulqa/results_2024-05-01T17-42-45.527465.json with huggingface_hub

Browse files
eval_results/abhishek/autotrain-mixtral-8x7b-orpo-v1/main/truthfulqa/results_2024-05-01T17-42-45.527465.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "override_batch_size": 4,
6
+ "max_samples": null,
7
+ "job_id": "",
8
+ "start_time": 281147.185656574,
9
+ "end_time": 283545.525195964,
10
+ "total_evaluation_time_secondes": "2398.339539389999",
11
+ "model_name": "abhishek/autotrain-mixtral-8x7b-orpo-v1",
12
+ "model_sha": "a8be37cf01ad767a0c71e0ba3af29c0b3ebcb559",
13
+ "model_dtype": "torch.bfloat16",
14
+ "model_size": "87.49 GB",
15
+ "config": null
16
+ },
17
+ "results": {
18
+ "leaderboard|truthfulqa:mc|0": {
19
+ "truthfulqa_mc1": 0.44430844553243576,
20
+ "truthfulqa_mc1_stderr": 0.017394586250743173,
21
+ "truthfulqa_mc2": 0.6220840546397899,
22
+ "truthfulqa_mc2_stderr": 0.014894972484841071
23
+ },
24
+ "all": {
25
+ "truthfulqa_mc1": 0.44430844553243576,
26
+ "truthfulqa_mc1_stderr": 0.017394586250743173,
27
+ "truthfulqa_mc2": 0.6220840546397899,
28
+ "truthfulqa_mc2_stderr": 0.014894972484841071
29
+ }
30
+ },
31
+ "versions": {
32
+ "leaderboard|truthfulqa:mc|0": 0
33
+ },
34
+ "config_tasks": {
35
+ "leaderboard|truthfulqa:mc": {
36
+ "name": "truthfulqa:mc",
37
+ "prompt_function": "truthful_qa_multiple_choice",
38
+ "hf_repo": "truthful_qa",
39
+ "hf_subset": "multiple_choice",
40
+ "metric": [
41
+ "truthfulqa_mc_metrics"
42
+ ],
43
+ "hf_avail_splits": [
44
+ "validation"
45
+ ],
46
+ "evaluation_splits": [
47
+ "validation"
48
+ ],
49
+ "few_shots_split": null,
50
+ "few_shots_select": null,
51
+ "generation_size": -1,
52
+ "stop_sequence": [
53
+ "\n"
54
+ ],
55
+ "output_regex": null,
56
+ "frozen": false,
57
+ "suite": [
58
+ "leaderboard"
59
+ ],
60
+ "original_num_docs": 817,
61
+ "effective_num_docs": 817,
62
+ "trust_dataset": true,
63
+ "must_remove_duplicate_docs": null
64
+ }
65
+ },
66
+ "summary_tasks": {
67
+ "leaderboard|truthfulqa:mc|0": {
68
+ "hashes": {
69
+ "hash_examples": "36a6d90e75d92d4a",
70
+ "hash_full_prompts": "17e9d0dc9f923ba3",
71
+ "hash_input_tokens": "3f8449843b7ddae4",
72
+ "hash_cont_tokens": "091a93bdab119aaa"
73
+ },
74
+ "truncated": 0,
75
+ "non_truncated": 817,
76
+ "padded": 9429,
77
+ "non_padded": 567,
78
+ "effective_few_shots": 0.0,
79
+ "num_truncated_few_shots": 0
80
+ }
81
+ },
82
+ "summary_general": {
83
+ "hashes": {
84
+ "hash_examples": "aed1dfc67e53d0f2",
85
+ "hash_full_prompts": "81a2e5a97bc8b7e3",
86
+ "hash_input_tokens": "b64ab790784d7eaa",
87
+ "hash_cont_tokens": "11d57456645f88a9"
88
+ },
89
+ "truncated": 0,
90
+ "non_truncated": 817,
91
+ "padded": 9429,
92
+ "non_padded": 567,
93
+ "num_truncated_few_shots": 0
94
+ }
95
+ }