陈俊杰
commited on
Commit
·
692a14c
1
Parent(s):
2bb6d3b
cjj-leaderboard
Browse files
app.py
CHANGED
@@ -202,36 +202,36 @@ This leaderboard is used to show the performance of the <strong>automatic evalua
|
|
202 |
('', 'teamId'),
|
203 |
('', 'methods'),
|
204 |
('', 'overall'),
|
205 |
-
('Dialogue Generation', 'accuracy'),
|
206 |
-
('Dialogue Generation', 'kendall\'s tau'),
|
207 |
-
('Dialogue Generation', 'spearman'),
|
208 |
-
('Text Expansion', 'accuracy'),
|
209 |
-
('Text Expansion', 'kendall\'s tau'),
|
210 |
-
('Text Expansion', 'spearman'),
|
211 |
-
('Summary Generation', 'accuracy'),
|
212 |
-
('Summary Generation', 'kendall\'s tau'),
|
213 |
-
('Summary Generation', 'spearman'),
|
214 |
-
('Non-Factoid QA', 'accuracy'),
|
215 |
-
('Non-Factoid QA', 'kendall\'s tau'),
|
216 |
-
('Non-Factoid QA', 'spearman')
|
217 |
])
|
218 |
|
219 |
data = {
|
220 |
('', 'teamId'): ['baseline', 'baseline', 'baseline', 'baseline'],
|
221 |
('', 'methods'): ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
|
222 |
('', 'overall'): [0,0,0,0],
|
223 |
-
('Dialogue Generation', 'accuracy'): [0.5806, 0.5483, 0.6001, 0.6472],
|
224 |
-
('Dialogue Generation', 'kendall\'s tau'): [0.3243, 0.1739, 0.3042, 0.4167],
|
225 |
-
('Dialogue Generation', 'spearman'): [0.3505, 0.1857, 0.3264, 0.4512],
|
226 |
-
('Text Expansion', 'accuracy'): [0.5107, 0.5050, 0.5461, 0.5581],
|
227 |
-
('Text Expansion', 'kendall\'s tau'): [0.1281, 0.0635, 0.2716, 0.3864],
|
228 |
-
('Text Expansion', 'spearman'): [0.1352, 0.0667, 0.2867, 0.4157],
|
229 |
-
('Summary Generation', 'accuracy'): [0.6504, 0.6014, 0.7162, 0.7441],
|
230 |
-
('Summary Generation', 'kendall\'s tau'): [0.3957, 0.2688, 0.5092, 0.5001],
|
231 |
-
('Summary Generation', 'spearman'): [0.4188, 0.2817, 0.5403, 0.5405],
|
232 |
-
('Non-Factoid QA', 'accuracy'): [0.5935, 0.5817, 0.7000, 0.7203],
|
233 |
-
('Non-Factoid QA', 'kendall\'s tau'): [0.2332, 0.2389, 0.4440, 0.4235],
|
234 |
-
('Non-Factoid QA', 'spearman'): [0.2443, 0.2492, 0.4630, 0.4511]
|
235 |
}
|
236 |
# overall = [0, 0, 0, 0]
|
237 |
# for d in data:
|
|
|
202 |
('', 'teamId'),
|
203 |
('', 'methods'),
|
204 |
('', 'overall'),
|
205 |
+
# ('Dialogue Generation', 'accuracy'),
|
206 |
+
# ('Dialogue Generation', 'kendall\'s tau'),
|
207 |
+
# ('Dialogue Generation', 'spearman'),
|
208 |
+
# ('Text Expansion', 'accuracy'),
|
209 |
+
# ('Text Expansion', 'kendall\'s tau'),
|
210 |
+
# ('Text Expansion', 'spearman'),
|
211 |
+
# ('Summary Generation', 'accuracy'),
|
212 |
+
# ('Summary Generation', 'kendall\'s tau'),
|
213 |
+
# ('Summary Generation', 'spearman'),
|
214 |
+
# ('Non-Factoid QA', 'accuracy'),
|
215 |
+
# ('Non-Factoid QA', 'kendall\'s tau'),
|
216 |
+
# ('Non-Factoid QA', 'spearman')
|
217 |
])
|
218 |
|
219 |
data = {
|
220 |
('', 'teamId'): ['baseline', 'baseline', 'baseline', 'baseline'],
|
221 |
('', 'methods'): ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
|
222 |
('', 'overall'): [0,0,0,0],
|
223 |
+
# ('Dialogue Generation', 'accuracy'): [0.5806, 0.5483, 0.6001, 0.6472],
|
224 |
+
# ('Dialogue Generation', 'kendall\'s tau'): [0.3243, 0.1739, 0.3042, 0.4167],
|
225 |
+
# ('Dialogue Generation', 'spearman'): [0.3505, 0.1857, 0.3264, 0.4512],
|
226 |
+
# ('Text Expansion', 'accuracy'): [0.5107, 0.5050, 0.5461, 0.5581],
|
227 |
+
# ('Text Expansion', 'kendall\'s tau'): [0.1281, 0.0635, 0.2716, 0.3864],
|
228 |
+
# ('Text Expansion', 'spearman'): [0.1352, 0.0667, 0.2867, 0.4157],
|
229 |
+
# ('Summary Generation', 'accuracy'): [0.6504, 0.6014, 0.7162, 0.7441],
|
230 |
+
# ('Summary Generation', 'kendall\'s tau'): [0.3957, 0.2688, 0.5092, 0.5001],
|
231 |
+
# ('Summary Generation', 'spearman'): [0.4188, 0.2817, 0.5403, 0.5405],
|
232 |
+
# ('Non-Factoid QA', 'accuracy'): [0.5935, 0.5817, 0.7000, 0.7203],
|
233 |
+
# ('Non-Factoid QA', 'kendall\'s tau'): [0.2332, 0.2389, 0.4440, 0.4235],
|
234 |
+
# ('Non-Factoid QA', 'spearman'): [0.2443, 0.2492, 0.4630, 0.4511]
|
235 |
}
|
236 |
# overall = [0, 0, 0, 0]
|
237 |
# for d in data:
|