sileod commited on
Commit
975123f
·
verified ·
1 Parent(s): 1cb6cb8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +133 -103
README.md CHANGED
@@ -19,111 +19,141 @@ tags:
19
 
20
  ModernBERT multi-task fine-tuned on tasksource NLI tasks, including MNLI, ANLI, SICK, WANLI, doc-nli, LingNLI, FOLIO, FOL-NLI, LogicNLI, Label-NLI and all datasets in the below table).
21
  This is the equivalent of an "instruct" version.
 
22
 
23
- Test accuracy at 100k training steps. 215k steps version coming december 24th.
24
 
25
- | test_name | test_accuracy |
26
- |:-------------------------------------|----------------:|
27
- | glue/mnli | 0.91 |
28
- | glue/qnli | 0.93 |
29
- | glue/rte | 0.86 |
30
- | super_glue/cb | 0.89 |
31
- | anli/a1 | 0.62 |
32
- | anli/a2 | 0.47 |
33
- | anli/a3 | 0.42 |
34
- | sick/label | 0.92 |
35
- | sick/entailment_AB | 0.84 |
36
- | snli | 0.91 |
37
- | scitail/snli_format | 0.95 |
38
- | hans | 1 |
39
- | WANLI | 0.71 |
40
- | recast/recast_sentiment | 0.98 |
41
- | recast/recast_verbcorner | 0.94 |
42
- | recast/recast_ner | 0.87 |
43
- | recast/recast_factuality | 0.93 |
44
- | recast/recast_puns | 0.93 |
45
- | recast/recast_kg_relations | 0.94 |
46
- | recast/recast_verbnet | 0.88 |
47
- | recast/recast_megaveridicality | 0.87 |
48
- | probability_words_nli/usnli | 0.77 |
49
- | probability_words_nli/reasoning_1hop | 0.99 |
50
- | probability_words_nli/reasoning_2hop | 0.9 |
51
- | nan-nli | 0.85 |
52
- | nli_fever | 0.72 |
53
- | breaking_nli | 1 |
54
- | conj_nli | 0.71 |
55
- | fracas | 0.86 |
56
- | dialogue_nli | 0.88 |
57
- | mpe | 0.73 |
58
- | dnc | 0.9 |
59
- | recast_white/fnplus | 0.81 |
60
- | recast_white/sprl | 0.92 |
61
- | recast_white/dpr | 0.61 |
62
- | robust_nli/IS_CS | 0.76 |
63
- | robust_nli/LI_LI | 0.98 |
64
- | robust_nli/ST_WO | 0.85 |
65
- | robust_nli/PI_SP | 0.74 |
66
- | robust_nli/PI_CD | 0.8 |
67
- | robust_nli/ST_SE | 0.78 |
68
- | robust_nli/ST_NE | 0.86 |
69
- | robust_nli/ST_LM | 0.81 |
70
- | robust_nli_is_sd | 1 |
71
- | robust_nli_li_ts | 0.91 |
72
- | add_one_rte | 0.91 |
73
- | cycic_classification | 0.83 |
74
- | lingnli | 0.82 |
75
- | monotonicity-entailment | 0.95 |
76
- | scinli | 0.79 |
77
- | naturallogic | 0.91 |
78
- | syntactic-augmentation-nli | 0.95 |
79
- | autotnli | 0.92 |
80
- | defeasible-nli/atomic | 0.76 |
81
- | defeasible-nli/snli | 0.79 |
82
- | help-nli | 0.91 |
83
- | nli-veridicality-transitivity | 0.99 |
84
- | lonli | 0.99 |
85
- | dadc-limit-nli | 0.67 |
86
- | folio | 0.59 |
87
- | tomi-nli | 0.53 |
88
- | temporal-nli | 0.92 |
89
- | counterfactually-augmented-snli | 0.74 |
90
- | cnli | 0.81 |
91
- | logiqa-2.0-nli | 0.57 |
92
- | mindgames | 0.94 |
93
- | ConTRoL-nli | 0.65 |
94
- | logical-fallacy | 0.31 |
95
- | conceptrules_v2 | 0.99 |
96
- | zero-shot-label-nli | 0.74 |
97
- | scone | 0.97 |
98
- | monli | 0.98 |
99
- | SpaceNLI | 1 |
100
- | propsegment/nli | 0.91 |
101
- | SDOH-NLI | 1 |
102
- | scifact_entailment | 0.78 |
103
- | AdjectiveScaleProbe-nli | 0.99 |
104
- | resnli | 0.99 |
105
- | semantic_fragments_nli | 0.99 |
106
- | dataset_train_nli | 0.88 |
107
- | ruletaker | 0.91 |
108
- | PARARULE-Plus | 1 |
109
- | logical-entailment | 0.73 |
110
- | nope | 0.54 |
111
- | LogicNLI | 0.65 |
112
- | contract-nli/contractnli_a/seg | 0.87 |
113
- | contract-nli/contractnli_b/full | 0.78 |
114
- | nli4ct_semeval2024 | 0.6 |
115
- | biosift-nli | 0.88 |
116
- | SIGA-nli | 0.54 |
117
- | FOL-nli | 0.71 |
118
- | doc-nli | 0.82 |
119
- | mctest-nli | 0.89 |
120
- | idioms-nli | 0.86 |
121
- | lifecycle-entailment | 0.71 |
122
- | MSciNLI | 0.82 |
123
- | hover-3way/nli | 0.9 |
124
- | seahorse_summarization_evaluation | 0.82 |
125
- | babi_nli | 0.94 |
126
- | gen_debiased_nli | 0.9 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  # Usage
129
 
 
19
 
20
  ModernBERT multi-task fine-tuned on tasksource NLI tasks, including MNLI, ANLI, SICK, WANLI, doc-nli, LingNLI, FOLIO, FOL-NLI, LogicNLI, Label-NLI and all datasets in the below table).
21
  This is the equivalent of an "instruct" version.
22
+ The model was trained for 200k steps on an Nvidia A30 GPU.
23
 
 
24
 
25
+
26
+ | test_name | test_accuracy |
27
+ |:--------------------------------------|----------------:|
28
+ | glue/mnli | 0.87 |
29
+ | glue/qnli | 0.93 |
30
+ | glue/rte | 0.85 |
31
+ | glue/mrpc | 0.87 |
32
+ | glue/qqp | 0.9 |
33
+ | glue/cola | 0.86 |
34
+ | glue/sst2 | 0.96 |
35
+ | super_glue/boolq | 0.64 |
36
+ | super_glue/cb | 0.89 |
37
+ | super_glue/multirc | 0.82 |
38
+ | super_glue/wic | 0.67 |
39
+ | super_glue/axg | 0.89 |
40
+ | anli/a1 | 0.66 |
41
+ | anli/a2 | 0.49 |
42
+ | anli/a3 | 0.44 |
43
+ | sick/label | 0.93 |
44
+ | sick/entailment_AB | 0.91 |
45
+ | snli | 0.83 |
46
+ | scitail/snli_format | 0.94 |
47
+ | hans | 1 |
48
+ | WANLI | 0.74 |
49
+ | recast/recast_ner | 0.87 |
50
+ | recast/recast_sentiment | 0.99 |
51
+ | recast/recast_verbnet | 0.88 |
52
+ | recast/recast_megaveridicality | 0.88 |
53
+ | recast/recast_verbcorner | 0.94 |
54
+ | recast/recast_kg_relations | 0.91 |
55
+ | recast/recast_factuality | 0.94 |
56
+ | recast/recast_puns | 0.96 |
57
+ | probability_words_nli/reasoning_1hop | 0.99 |
58
+ | probability_words_nli/usnli | 0.72 |
59
+ | probability_words_nli/reasoning_2hop | 0.98 |
60
+ | nan-nli | 0.85 |
61
+ | nli_fever | 0.78 |
62
+ | breaking_nli | 0.99 |
63
+ | conj_nli | 0.74 |
64
+ | fracas | 0.86 |
65
+ | dialogue_nli | 0.93 |
66
+ | mpe | 0.74 |
67
+ | dnc | 0.92 |
68
+ | recast_white/fnplus | 0.82 |
69
+ | recast_white/sprl | 0.9 |
70
+ | recast_white/dpr | 0.68 |
71
+ | robust_nli/IS_CS | 0.79 |
72
+ | robust_nli/LI_LI | 0.99 |
73
+ | robust_nli/ST_WO | 0.85 |
74
+ | robust_nli/PI_SP | 0.74 |
75
+ | robust_nli/PI_CD | 0.8 |
76
+ | robust_nli/ST_SE | 0.81 |
77
+ | robust_nli/ST_NE | 0.86 |
78
+ | robust_nli/ST_LM | 0.87 |
79
+ | robust_nli_is_sd | 1 |
80
+ | robust_nli_li_ts | 0.89 |
81
+ | add_one_rte | 0.94 |
82
+ | paws/labeled_final | 0.95 |
83
+ | pragmeval/pdtb | 0.64 |
84
+ | lex_glue/scotus | 0.55 |
85
+ | lex_glue/ledgar | 0.8 |
86
+ | dynasent/dynabench.dynasent.r1.all/r1 | 0.81 |
87
+ | dynasent/dynabench.dynasent.r2.all/r2 | 0.75 |
88
+ | cycic_classification | 0.9 |
89
+ | lingnli | 0.84 |
90
+ | monotonicity-entailment | 0.97 |
91
+ | scinli | 0.8 |
92
+ | naturallogic | 0.96 |
93
+ | dynahate | 0.78 |
94
+ | syntactic-augmentation-nli | 0.92 |
95
+ | autotnli | 0.94 |
96
+ | defeasible-nli/atomic | 0.81 |
97
+ | defeasible-nli/snli | 0.78 |
98
+ | help-nli | 0.96 |
99
+ | nli-veridicality-transitivity | 0.98 |
100
+ | lonli | 0.97 |
101
+ | dadc-limit-nli | 0.69 |
102
+ | folio | 0.66 |
103
+ | tomi-nli | 0.48 |
104
+ | puzzte | 0.6 |
105
+ | temporal-nli | 0.92 |
106
+ | counterfactually-augmented-snli | 0.79 |
107
+ | cnli | 0.87 |
108
+ | boolq-natural-perturbations | 0.66 |
109
+ | equate | 0.63 |
110
+ | logiqa-2.0-nli | 0.52 |
111
+ | mindgames | 0.96 |
112
+ | ConTRoL-nli | 0.67 |
113
+ | logical-fallacy | 0.37 |
114
+ | cladder | 0.87 |
115
+ | conceptrules_v2 | 1 |
116
+ | zero-shot-label-nli | 0.82 |
117
+ | scone | 0.98 |
118
+ | monli | 1 |
119
+ | SpaceNLI | 1 |
120
+ | propsegment/nli | 0.88 |
121
+ | FLD.v2/default | 0.91 |
122
+ | FLD.v2/star | 0.76 |
123
+ | SDOH-NLI | 0.98 |
124
+ | scifact_entailment | 0.84 |
125
+ | AdjectiveScaleProbe-nli | 0.99 |
126
+ | resnli | 1 |
127
+ | semantic_fragments_nli | 0.99 |
128
+ | dataset_train_nli | 0.94 |
129
+ | nlgraph | 0.94 |
130
+ | ruletaker | 0.99 |
131
+ | PARARULE-Plus | 1 |
132
+ | logical-entailment | 0.86 |
133
+ | nope | 0.44 |
134
+ | LogicNLI | 0.86 |
135
+ | contract-nli/contractnli_a/seg | 0.87 |
136
+ | contract-nli/contractnli_b/full | 0.79 |
137
+ | nli4ct_semeval2024 | 0.67 |
138
+ | biosift-nli | 0.92 |
139
+ | SIGA-nli | 0.53 |
140
+ | FOL-nli | 0.8 |
141
+ | doc-nli | 0.77 |
142
+ | mctest-nli | 0.87 |
143
+ | natural-language-satisfiability | 0.9 |
144
+ | idioms-nli | 0.81 |
145
+ | lifecycle-entailment | 0.78 |
146
+ | MSciNLI | 0.85 |
147
+ | hover-3way/nli | 0.88 |
148
+ | seahorse_summarization_evaluation | 0.73 |
149
+ | missing-item-prediction/contrastive | 0.79 |
150
+ | Pol_NLI | 0.89 |
151
+ | synthetic-retrieval-NLI/count | 0.64 |
152
+ | synthetic-retrieval-NLI/position | 0.89 |
153
+ | synthetic-retrieval-NLI/binary | 0.91 |
154
+ | babi_nli | 0.97 |
155
+ | gen_debiased_nli | 0.91 |
156
+
157
 
158
  # Usage
159