陈俊杰 commited on
Commit
692a14c
·
1 Parent(s): 2bb6d3b

cjj-leaderboard

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -202,36 +202,36 @@ This leaderboard is used to show the performance of the <strong>automatic evalua
202
  ('', 'teamId'),
203
  ('', 'methods'),
204
  ('', 'overall'),
205
- ('Dialogue Generation', 'accuracy'),
206
- ('Dialogue Generation', 'kendall\'s tau'),
207
- ('Dialogue Generation', 'spearman'),
208
- ('Text Expansion', 'accuracy'),
209
- ('Text Expansion', 'kendall\'s tau'),
210
- ('Text Expansion', 'spearman'),
211
- ('Summary Generation', 'accuracy'),
212
- ('Summary Generation', 'kendall\'s tau'),
213
- ('Summary Generation', 'spearman'),
214
- ('Non-Factoid QA', 'accuracy'),
215
- ('Non-Factoid QA', 'kendall\'s tau'),
216
- ('Non-Factoid QA', 'spearman')
217
  ])
218
 
219
  data = {
220
  ('', 'teamId'): ['baseline', 'baseline', 'baseline', 'baseline'],
221
  ('', 'methods'): ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
222
  ('', 'overall'): [0,0,0,0],
223
- ('Dialogue Generation', 'accuracy'): [0.5806, 0.5483, 0.6001, 0.6472],
224
- ('Dialogue Generation', 'kendall\'s tau'): [0.3243, 0.1739, 0.3042, 0.4167],
225
- ('Dialogue Generation', 'spearman'): [0.3505, 0.1857, 0.3264, 0.4512],
226
- ('Text Expansion', 'accuracy'): [0.5107, 0.5050, 0.5461, 0.5581],
227
- ('Text Expansion', 'kendall\'s tau'): [0.1281, 0.0635, 0.2716, 0.3864],
228
- ('Text Expansion', 'spearman'): [0.1352, 0.0667, 0.2867, 0.4157],
229
- ('Summary Generation', 'accuracy'): [0.6504, 0.6014, 0.7162, 0.7441],
230
- ('Summary Generation', 'kendall\'s tau'): [0.3957, 0.2688, 0.5092, 0.5001],
231
- ('Summary Generation', 'spearman'): [0.4188, 0.2817, 0.5403, 0.5405],
232
- ('Non-Factoid QA', 'accuracy'): [0.5935, 0.5817, 0.7000, 0.7203],
233
- ('Non-Factoid QA', 'kendall\'s tau'): [0.2332, 0.2389, 0.4440, 0.4235],
234
- ('Non-Factoid QA', 'spearman'): [0.2443, 0.2492, 0.4630, 0.4511]
235
  }
236
  # overall = [0, 0, 0, 0]
237
  # for d in data:
 
202
  ('', 'teamId'),
203
  ('', 'methods'),
204
  ('', 'overall'),
205
+ # ('Dialogue Generation', 'accuracy'),
206
+ # ('Dialogue Generation', 'kendall\'s tau'),
207
+ # ('Dialogue Generation', 'spearman'),
208
+ # ('Text Expansion', 'accuracy'),
209
+ # ('Text Expansion', 'kendall\'s tau'),
210
+ # ('Text Expansion', 'spearman'),
211
+ # ('Summary Generation', 'accuracy'),
212
+ # ('Summary Generation', 'kendall\'s tau'),
213
+ # ('Summary Generation', 'spearman'),
214
+ # ('Non-Factoid QA', 'accuracy'),
215
+ # ('Non-Factoid QA', 'kendall\'s tau'),
216
+ # ('Non-Factoid QA', 'spearman')
217
  ])
218
 
219
  data = {
220
  ('', 'teamId'): ['baseline', 'baseline', 'baseline', 'baseline'],
221
  ('', 'methods'): ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o-mini"],
222
  ('', 'overall'): [0,0,0,0],
223
+ # ('Dialogue Generation', 'accuracy'): [0.5806, 0.5483, 0.6001, 0.6472],
224
+ # ('Dialogue Generation', 'kendall\'s tau'): [0.3243, 0.1739, 0.3042, 0.4167],
225
+ # ('Dialogue Generation', 'spearman'): [0.3505, 0.1857, 0.3264, 0.4512],
226
+ # ('Text Expansion', 'accuracy'): [0.5107, 0.5050, 0.5461, 0.5581],
227
+ # ('Text Expansion', 'kendall\'s tau'): [0.1281, 0.0635, 0.2716, 0.3864],
228
+ # ('Text Expansion', 'spearman'): [0.1352, 0.0667, 0.2867, 0.4157],
229
+ # ('Summary Generation', 'accuracy'): [0.6504, 0.6014, 0.7162, 0.7441],
230
+ # ('Summary Generation', 'kendall\'s tau'): [0.3957, 0.2688, 0.5092, 0.5001],
231
+ # ('Summary Generation', 'spearman'): [0.4188, 0.2817, 0.5403, 0.5405],
232
+ # ('Non-Factoid QA', 'accuracy'): [0.5935, 0.5817, 0.7000, 0.7203],
233
+ # ('Non-Factoid QA', 'kendall\'s tau'): [0.2332, 0.2389, 0.4440, 0.4235],
234
+ # ('Non-Factoid QA', 'spearman'): [0.2443, 0.2492, 0.4630, 0.4511]
235
  }
236
  # overall = [0, 0, 0, 0]
237
  # for d in data: