Upload ./Qwen2-7B-Q2_K.mmlu.pro.txt with huggingface_hub
Browse files- Qwen2-7B-Q2_K.mmlu.pro.txt +2 -80
Qwen2-7B-Q2_K.mmlu.pro.txt
CHANGED
@@ -1,80 +1,2 @@
|
|
1 |
-
multiple_choice_score: there are
|
2 |
-
multiple_choice_score: reading
|
3 |
-
multiple_choice_score: preparing task data......................................................................done
|
4 |
-
multiple_choice_score : calculating TruthfulQA score over 70 tasks.
|
5 |
-
|
6 |
-
task acc_norm
|
7 |
-
1 0.00000000
|
8 |
-
2 0.00000000
|
9 |
-
3 0.00000000
|
10 |
-
4 0.00000000
|
11 |
-
5 0.00000000
|
12 |
-
6 0.00000000
|
13 |
-
7 0.00000000
|
14 |
-
8 0.00000000
|
15 |
-
9 0.00000000
|
16 |
-
10 0.00000000
|
17 |
-
11 0.00000000
|
18 |
-
12 0.00000000
|
19 |
-
13 0.00000000
|
20 |
-
14 0.00000000
|
21 |
-
15 6.66666667
|
22 |
-
16 6.25000000
|
23 |
-
17 5.88235294
|
24 |
-
18 5.55555556
|
25 |
-
19 5.26315789
|
26 |
-
20 5.00000000
|
27 |
-
21 4.76190476
|
28 |
-
22 4.54545455
|
29 |
-
23 4.34782609
|
30 |
-
24 4.16666667
|
31 |
-
25 4.00000000
|
32 |
-
26 3.84615385
|
33 |
-
27 3.70370370
|
34 |
-
28 3.57142857
|
35 |
-
29 3.44827586
|
36 |
-
30 6.66666667
|
37 |
-
31 6.45161290
|
38 |
-
32 6.25000000
|
39 |
-
33 9.09090909
|
40 |
-
34 8.82352941
|
41 |
-
35 8.57142857
|
42 |
-
36 8.33333333
|
43 |
-
37 8.10810811
|
44 |
-
38 7.89473684
|
45 |
-
39 7.69230769
|
46 |
-
40 7.50000000
|
47 |
-
41 7.31707317
|
48 |
-
42 7.14285714
|
49 |
-
43 6.97674419
|
50 |
-
44 6.81818182
|
51 |
-
45 6.66666667
|
52 |
-
46 8.69565217
|
53 |
-
47 8.51063830
|
54 |
-
48 10.41666667
|
55 |
-
49 10.20408163
|
56 |
-
50 12.00000000
|
57 |
-
51 11.76470588
|
58 |
-
52 11.53846154
|
59 |
-
53 13.20754717
|
60 |
-
54 14.81481481
|
61 |
-
55 16.36363636
|
62 |
-
56 16.07142857
|
63 |
-
57 15.78947368
|
64 |
-
58 15.51724138
|
65 |
-
59 15.25423729
|
66 |
-
60 15.00000000
|
67 |
-
61 14.75409836
|
68 |
-
62 14.51612903
|
69 |
-
63 14.28571429
|
70 |
-
64 15.62500000
|
71 |
-
65 15.38461538
|
72 |
-
66 15.15151515
|
73 |
-
67 14.92537313
|
74 |
-
68 14.70588235
|
75 |
-
69 14.49275362
|
76 |
-
70 14.28571429
|
77 |
-
|
78 |
-
Final result: 14.2857 +/- 4.2126
|
79 |
-
Random chance: 10.0000 +/- 3.6116
|
80 |
-
|
|
|
1 |
+
multiple_choice_score: there are 12032 tasks in prompt
|
2 |
+
multiple_choice_score: reading tasksmultiple_choice_score: failed to read task 1 of 12032
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|