自良 commited on
Commit
512e3dd
·
1 Parent(s): 6ce1f2f

update leaderboard

Browse files
arena_elo/cut_off_date.txt CHANGED
@@ -1 +1 @@
1
- 20250107
 
1
+ 20250108
arena_elo/elo_rating/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/arena_elo/elo_rating/__pycache__/__init__.cpython-310.pyc and b/arena_elo/elo_rating/__pycache__/__init__.cpython-310.pyc differ
 
arena_elo/elo_rating/__pycache__/basic_stats.cpython-310.pyc CHANGED
Binary files a/arena_elo/elo_rating/__pycache__/basic_stats.cpython-310.pyc and b/arena_elo/elo_rating/__pycache__/basic_stats.cpython-310.pyc differ
 
arena_elo/elo_rating/__pycache__/clean_battle_data.cpython-310.pyc CHANGED
Binary files a/arena_elo/elo_rating/__pycache__/clean_battle_data.cpython-310.pyc and b/arena_elo/elo_rating/__pycache__/clean_battle_data.cpython-310.pyc differ
 
arena_elo/elo_rating/__pycache__/elo_analysis.cpython-310.pyc CHANGED
Binary files a/arena_elo/elo_rating/__pycache__/elo_analysis.cpython-310.pyc and b/arena_elo/elo_rating/__pycache__/elo_analysis.cpython-310.pyc differ
 
arena_elo/elo_rating/__pycache__/generate_leaderboard.cpython-310.pyc CHANGED
Binary files a/arena_elo/elo_rating/__pycache__/generate_leaderboard.cpython-310.pyc and b/arena_elo/elo_rating/__pycache__/generate_leaderboard.cpython-310.pyc differ
 
arena_elo/elo_rating/__pycache__/model_registry.cpython-310.pyc CHANGED
Binary files a/arena_elo/elo_rating/__pycache__/model_registry.cpython-310.pyc and b/arena_elo/elo_rating/__pycache__/model_registry.cpython-310.pyc differ
 
arena_elo/elo_rating/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/arena_elo/elo_rating/__pycache__/utils.cpython-310.pyc and b/arena_elo/elo_rating/__pycache__/utils.cpython-310.pyc differ
 
arena_elo/results/20250108/clean_battle.json ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model_a": "GPT-4o + FLUX.1 [dev]",
4
+ "model_b": "ChatDiT",
5
+ "winner": "model_b",
6
+ "judge": "arena_user_127.0.0.1",
7
+ "anony": true,
8
+ "tstamp": 1735030427.6669
9
+ },
10
+ {
11
+ "model_a": "GPT-4o + FLUX.1 [dev]",
12
+ "model_b": "ChatDiT",
13
+ "winner": "model_a",
14
+ "judge": "arena_user_127.0.0.1",
15
+ "anony": true,
16
+ "tstamp": 1735030452.0238
17
+ },
18
+ {
19
+ "model_a": "ChatDiT",
20
+ "model_b": "GPT-4o + FLUX.1 [dev]",
21
+ "winner": "model_a",
22
+ "judge": "arena_user_127.0.0.1",
23
+ "anony": true,
24
+ "tstamp": 1735030464.2602
25
+ },
26
+ {
27
+ "model_a": "ChatDiT",
28
+ "model_b": "GPT-4o + FLUX.1 [dev]",
29
+ "winner": "model_a",
30
+ "judge": "arena_user_127.0.0.1",
31
+ "anony": true,
32
+ "tstamp": 1735030476.2328
33
+ },
34
+ {
35
+ "model_a": "GPT-4o + FLUX.1 [dev]",
36
+ "model_b": "ChatDiT",
37
+ "winner": "tie (bothbad)",
38
+ "judge": "arena_user_127.0.0.1",
39
+ "anony": true,
40
+ "tstamp": 1735030495.2955
41
+ },
42
+ {
43
+ "model_a": "ChatDiT",
44
+ "model_b": "GPT-4o + FLUX.1 [dev]",
45
+ "winner": "tie (bothbad)",
46
+ "judge": "arena_user_127.0.0.1",
47
+ "anony": true,
48
+ "tstamp": 1735030503.418
49
+ },
50
+ {
51
+ "model_a": "ChatDiT",
52
+ "model_b": "GPT-4o + FLUX.1 [dev]",
53
+ "winner": "model_a",
54
+ "judge": "arena_user_127.0.0.1",
55
+ "anony": true,
56
+ "tstamp": 1735030511.3926
57
+ },
58
+ {
59
+ "model_a": "ChatDiT",
60
+ "model_b": "GPT-4o + FLUX.1 [dev]",
61
+ "winner": "tie (bothbad)",
62
+ "judge": "arena_user_127.0.0.1",
63
+ "anony": true,
64
+ "tstamp": 1735034259.9984
65
+ },
66
+ {
67
+ "model_a": "ChatDiT",
68
+ "model_b": "GPT-4o + FLUX.1 [dev]",
69
+ "winner": "model_a",
70
+ "judge": "arena_user_127.0.0.1",
71
+ "anony": true,
72
+ "tstamp": 1735034275.6871
73
+ },
74
+ {
75
+ "model_a": "ChatDiT",
76
+ "model_b": "GPT-4o + FLUX.1 [dev]",
77
+ "winner": "model_a",
78
+ "judge": "arena_user_127.0.0.1",
79
+ "anony": true,
80
+ "tstamp": 1735034284.7354
81
+ },
82
+ {
83
+ "model_a": "GPT-4o + FLUX.1 [dev]",
84
+ "model_b": "ChatDiT",
85
+ "winner": "model_a",
86
+ "judge": "arena_user_127.0.0.1",
87
+ "anony": true,
88
+ "tstamp": 1735034293.468
89
+ },
90
+ {
91
+ "model_a": "ChatDiT",
92
+ "model_b": "GPT-4o + FLUX.1 [dev]",
93
+ "winner": "model_b",
94
+ "judge": "arena_user_127.0.0.1",
95
+ "anony": true,
96
+ "tstamp": 1735034303.2042
97
+ },
98
+ {
99
+ "model_a": "ChatDiT",
100
+ "model_b": "GPT-4o + FLUX.1 [dev]",
101
+ "winner": "model_a",
102
+ "judge": "arena_user_127.0.0.1",
103
+ "anony": true,
104
+ "tstamp": 1735034314.1941
105
+ },
106
+ {
107
+ "model_a": "GPT-4o + FLUX.1 [dev]",
108
+ "model_b": "ChatDiT",
109
+ "winner": "model_a",
110
+ "judge": "arena_user_127.0.0.1",
111
+ "anony": true,
112
+ "tstamp": 1735034326.5092
113
+ },
114
+ {
115
+ "model_a": "GPT-4o + FLUX.1 [dev]",
116
+ "model_b": "ChatDiT",
117
+ "winner": "model_b",
118
+ "judge": "arena_user_127.0.0.1",
119
+ "anony": true,
120
+ "tstamp": 1735034331.6963
121
+ },
122
+ {
123
+ "model_a": "GPT-4o + FLUX.1 [dev]",
124
+ "model_b": "ChatDiT",
125
+ "winner": "tie (bothbad)",
126
+ "judge": "arena_user_127.0.0.1",
127
+ "anony": true,
128
+ "tstamp": 1735034336.5346
129
+ },
130
+ {
131
+ "model_a": "ChatDiT",
132
+ "model_b": "GPT-4o + FLUX.1 [dev]",
133
+ "winner": "model_b",
134
+ "judge": "arena_user_127.0.0.1",
135
+ "anony": true,
136
+ "tstamp": 1735034351.9521
137
+ },
138
+ {
139
+ "model_a": "GPT-4o + FLUX.1 [dev]",
140
+ "model_b": "ChatDiT",
141
+ "winner": "model_b",
142
+ "judge": "arena_user_127.0.0.1",
143
+ "anony": true,
144
+ "tstamp": 1735034366.1775
145
+ },
146
+ {
147
+ "model_a": "GPT-4o + FLUX.1 [dev]",
148
+ "model_b": "ChatDiT",
149
+ "winner": "model_a",
150
+ "judge": "arena_user_127.0.0.1",
151
+ "anony": true,
152
+ "tstamp": 1735034380.5877
153
+ },
154
+ {
155
+ "model_a": "ChatDiT",
156
+ "model_b": "GPT-4o + FLUX.1 [dev]",
157
+ "winner": "model_b",
158
+ "judge": "arena_user_127.0.0.1",
159
+ "anony": true,
160
+ "tstamp": 1735034384.3087
161
+ },
162
+ {
163
+ "model_a": "GPT-4o + FLUX.1 [dev]",
164
+ "model_b": "ChatDiT",
165
+ "winner": "model_a",
166
+ "judge": "arena_user_127.0.0.1",
167
+ "anony": true,
168
+ "tstamp": 1735034389.1583
169
+ },
170
+ {
171
+ "model_a": "GPT-4o + FLUX.1 [dev]",
172
+ "model_b": "ChatDiT",
173
+ "winner": "model_b",
174
+ "judge": "arena_user_127.0.0.1",
175
+ "anony": true,
176
+ "tstamp": 1735034405.9359
177
+ },
178
+ {
179
+ "model_a": "GPT-4o + FLUX.1 [dev]",
180
+ "model_b": "ChatDiT",
181
+ "winner": "model_b",
182
+ "judge": "arena_user_127.0.0.1",
183
+ "anony": true,
184
+ "tstamp": 1735034412.3533
185
+ },
186
+ {
187
+ "model_a": "GPT-4o + FLUX.1 [dev]",
188
+ "model_b": "ChatDiT",
189
+ "winner": "model_a",
190
+ "judge": "arena_user_127.0.0.1",
191
+ "anony": true,
192
+ "tstamp": 1735034419.0118
193
+ },
194
+ {
195
+ "model_a": "GPT-4o + FLUX.1 [dev]",
196
+ "model_b": "ChatDiT",
197
+ "winner": "model_b",
198
+ "judge": "arena_user_127.0.0.1",
199
+ "anony": true,
200
+ "tstamp": 1735034425.6972
201
+ },
202
+ {
203
+ "model_a": "GPT-4o + FLUX.1 [dev]",
204
+ "model_b": "ChatDiT",
205
+ "winner": "model_b",
206
+ "judge": "arena_user_127.0.0.1",
207
+ "anony": true,
208
+ "tstamp": 1735034432.5891
209
+ },
210
+ {
211
+ "model_a": "ChatDiT",
212
+ "model_b": "GPT-4o + FLUX.1 [dev]",
213
+ "winner": "model_a",
214
+ "judge": "arena_user_127.0.0.1",
215
+ "anony": true,
216
+ "tstamp": 1735092762.0
217
+ },
218
+ {
219
+ "model_a": "GPT-4o + FLUX.1 [dev]",
220
+ "model_b": "ChatDiT",
221
+ "winner": "tie (bothbad)",
222
+ "judge": "arena_user_127.0.0.1",
223
+ "anony": true,
224
+ "tstamp": 1735092774.618
225
+ },
226
+ {
227
+ "model_a": "GPT-4o + FLUX.1 [dev]",
228
+ "model_b": "ChatDiT",
229
+ "winner": "model_a",
230
+ "judge": "arena_user_127.0.0.1",
231
+ "anony": true,
232
+ "tstamp": 1735092797.2067
233
+ },
234
+ {
235
+ "model_a": "GPT-4o + FLUX.1 [dev]",
236
+ "model_b": "ChatDiT",
237
+ "winner": "model_b",
238
+ "judge": "arena_user_127.0.0.1",
239
+ "anony": true,
240
+ "tstamp": 1735092804.6699
241
+ },
242
+ {
243
+ "model_a": "GPT-4o + FLUX.1 [dev]",
244
+ "model_b": "ChatDiT",
245
+ "winner": "model_a",
246
+ "judge": "arena_user_127.0.0.1",
247
+ "anony": true,
248
+ "tstamp": 1735092810.2635
249
+ },
250
+ {
251
+ "model_a": "GPT-4o + FLUX.1 [dev]",
252
+ "model_b": "ChatDiT",
253
+ "winner": "model_b",
254
+ "judge": "arena_user_127.0.0.1",
255
+ "anony": true,
256
+ "tstamp": 1735093113.5724
257
+ },
258
+ {
259
+ "model_a": "ChatDiT",
260
+ "model_b": "GPT-4o + FLUX.1 [dev]",
261
+ "winner": "tie (bothbad)",
262
+ "judge": "arena_user_127.0.0.1",
263
+ "anony": true,
264
+ "tstamp": 1735093133.2436
265
+ },
266
+ {
267
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
268
+ "model_b": "GPT-4o + OmniGen",
269
+ "winner": "model_a",
270
+ "judge": "arena_user_127.0.0.1",
271
+ "anony": true,
272
+ "tstamp": 1735187628.4881
273
+ },
274
+ {
275
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
276
+ "model_b": "GPT-4o + PixArt-Sigma",
277
+ "winner": "model_b",
278
+ "judge": "arena_user_127.0.0.1",
279
+ "anony": true,
280
+ "tstamp": 1735187649.4872
281
+ },
282
+ {
283
+ "model_a": "GPT-4o + Emu2",
284
+ "model_b": "ChatDiT",
285
+ "winner": "model_a",
286
+ "judge": "arena_user_127.0.0.1",
287
+ "anony": true,
288
+ "tstamp": 1735197562.2637
289
+ },
290
+ {
291
+ "model_a": "GPT-4o + FLUX.1 [dev]",
292
+ "model_b": "GPT-4o + PixArt-Sigma",
293
+ "winner": "model_a",
294
+ "judge": "arena_user_127.0.0.1",
295
+ "anony": true,
296
+ "tstamp": 1735197586.8438
297
+ },
298
+ {
299
+ "model_a": "ChatDiT",
300
+ "model_b": "GPT-4o + FLUX.1 [dev]",
301
+ "winner": "model_a",
302
+ "judge": "arena_user_127.0.0.1",
303
+ "anony": false,
304
+ "tstamp": 1735201758.7145
305
+ },
306
+ {
307
+ "model_a": "GPT-4o + DALLE-3",
308
+ "model_b": "GPT-4o + PixArt-Sigma",
309
+ "winner": "model_b",
310
+ "judge": "arena_user_127.0.0.1",
311
+ "anony": false,
312
+ "tstamp": 1735202083.631
313
+ },
314
+ {
315
+ "model_a": "GPT-4o + DALLE-3",
316
+ "model_b": "GPT-4o + PixArt-Sigma",
317
+ "winner": "model_a",
318
+ "judge": "arena_user_127.0.0.1",
319
+ "anony": false,
320
+ "tstamp": 1735202099.4377
321
+ },
322
+ {
323
+ "model_a": "GPT-4o + OmniGen",
324
+ "model_b": "ChatDiT",
325
+ "winner": "model_b",
326
+ "judge": "arena_user_127.0.0.1",
327
+ "anony": true,
328
+ "tstamp": 1735202132.8592
329
+ },
330
+ {
331
+ "model_a": "GPT-4o + DALLE-3",
332
+ "model_b": "GPT-4o + PixArt-Sigma",
333
+ "winner": "model_b",
334
+ "judge": "arena_user_127.0.0.1",
335
+ "anony": false,
336
+ "tstamp": 1735202545.8694
337
+ },
338
+ {
339
+ "model_a": "GPT-4o + DALLE-3",
340
+ "model_b": "GPT-4o + PixArt-Sigma",
341
+ "winner": "model_a",
342
+ "judge": "arena_user_127.0.0.1",
343
+ "anony": false,
344
+ "tstamp": 1735202565.5723
345
+ },
346
+ {
347
+ "model_a": "GPT-4o + DALLE-3",
348
+ "model_b": "GPT-4o + PixArt-Sigma",
349
+ "winner": "tie (bothbad)",
350
+ "judge": "arena_user_127.0.0.1",
351
+ "anony": false,
352
+ "tstamp": 1735202573.0118
353
+ },
354
+ {
355
+ "model_a": "GPT-4o + DALLE-3",
356
+ "model_b": "GPT-4o + PixArt-Sigma",
357
+ "winner": "tie (bothbad)",
358
+ "judge": "arena_user_127.0.0.1",
359
+ "anony": false,
360
+ "tstamp": 1735203523.809
361
+ },
362
+ {
363
+ "model_a": "GPT-4o + OmniGen",
364
+ "model_b": "GPT-4o + DALLE-3",
365
+ "winner": "model_b",
366
+ "judge": "arena_user_127.0.0.1",
367
+ "anony": true,
368
+ "tstamp": 1735205600.7414
369
+ },
370
+ {
371
+ "model_a": "ChatDiT",
372
+ "model_b": "GPT-4o + DALLE-3",
373
+ "winner": "model_a",
374
+ "judge": "arena_user_127.0.0.1",
375
+ "anony": true,
376
+ "tstamp": 1735207454.8251
377
+ },
378
+ {
379
+ "model_a": "GPT-4o + OmniGen",
380
+ "model_b": "GPT-4o + Stable Diffusion 3 Medium",
381
+ "winner": "model_b",
382
+ "judge": "arena_user_127.0.0.1",
383
+ "anony": true,
384
+ "tstamp": 1735207466.0131
385
+ },
386
+ {
387
+ "model_a": "GPT-4o + DALLE-3",
388
+ "model_b": "GPT-4o + Emu2",
389
+ "winner": "model_b",
390
+ "judge": "arena_user_127.0.0.1",
391
+ "anony": true,
392
+ "tstamp": 1735215923.1589
393
+ },
394
+ {
395
+ "model_a": "GPT-4o + PixArt-Sigma",
396
+ "model_b": "GPT-4o + DALLE-3",
397
+ "winner": "model_a",
398
+ "judge": "arena_user_127.0.0.1",
399
+ "anony": true,
400
+ "tstamp": 1735215935.7597
401
+ },
402
+ {
403
+ "model_a": "GPT-4o + OmniGen",
404
+ "model_b": "GPT-4o + PixArt-Sigma",
405
+ "winner": "tie (bothbad)",
406
+ "judge": "arena_user_127.0.0.1",
407
+ "anony": true,
408
+ "tstamp": 1735215942.7093
409
+ },
410
+ {
411
+ "model_a": "GPT-4o + PixArt-Sigma",
412
+ "model_b": "GPT-4o + OmniGen",
413
+ "winner": "model_a",
414
+ "judge": "arena_user_127.0.0.1",
415
+ "anony": true,
416
+ "tstamp": 1735215949.7965
417
+ },
418
+ {
419
+ "model_a": "GPT-4o + DALLE-3",
420
+ "model_b": "ChatDiT",
421
+ "winner": "model_b",
422
+ "judge": "arena_user_127.0.0.1",
423
+ "anony": true,
424
+ "tstamp": 1735215962.6898
425
+ },
426
+ {
427
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
428
+ "model_b": "GPT-4o + DALLE-3",
429
+ "winner": "tie (bothbad)",
430
+ "judge": "arena_user_127.0.0.1",
431
+ "anony": true,
432
+ "tstamp": 1735215968.9052
433
+ },
434
+ {
435
+ "model_a": "GPT-4o + FLUX.1 [dev]",
436
+ "model_b": "GPT-4o + Stable Diffusion 3 Medium",
437
+ "winner": "tie (bothbad)",
438
+ "judge": "arena_user_127.0.0.1",
439
+ "anony": true,
440
+ "tstamp": 1735215976.5079
441
+ },
442
+ {
443
+ "model_a": "GPT-4o + Emu2",
444
+ "model_b": "GPT-4o + Stable Diffusion 3 Medium",
445
+ "winner": "model_b",
446
+ "judge": "arena_user_127.0.0.1",
447
+ "anony": true,
448
+ "tstamp": 1735215982.9709
449
+ },
450
+ {
451
+ "model_a": "ChatDiT",
452
+ "model_b": "GPT-4o + PixArt-Sigma",
453
+ "winner": "model_a",
454
+ "judge": "arena_user_127.0.0.1",
455
+ "anony": true,
456
+ "tstamp": 1735215993.2305
457
+ },
458
+ {
459
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
460
+ "model_b": "GPT-4o + FLUX.1 [dev]",
461
+ "winner": "tie (bothbad)",
462
+ "judge": "arena_user_127.0.0.1",
463
+ "anony": true,
464
+ "tstamp": 1735215999.8713
465
+ },
466
+ {
467
+ "model_a": "GPT-4o + PixArt-Sigma",
468
+ "model_b": "GPT-4o + FLUX.1 [dev]",
469
+ "winner": "model_b",
470
+ "judge": "arena_user_127.0.0.1",
471
+ "anony": true,
472
+ "tstamp": 1735216012.8216
473
+ },
474
+ {
475
+ "model_a": "ChatDiT",
476
+ "model_b": "GPT-4o + PixArt-Sigma",
477
+ "winner": "model_a",
478
+ "judge": "arena_user_127.0.0.1",
479
+ "anony": true,
480
+ "tstamp": 1735216021.653
481
+ },
482
+ {
483
+ "model_a": "GPT-4o + PixArt-Sigma",
484
+ "model_b": "GPT-4o + OmniGen",
485
+ "winner": "model_b",
486
+ "judge": "arena_user_127.0.0.1",
487
+ "anony": true,
488
+ "tstamp": 1735286354.5764
489
+ },
490
+ {
491
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
492
+ "model_b": "ChatDiT",
493
+ "winner": "tie (bothbad)",
494
+ "judge": "arena_user_127.0.0.1",
495
+ "anony": true,
496
+ "tstamp": 1735286365.2329
497
+ },
498
+ {
499
+ "model_a": "GPT-4o + Emu2",
500
+ "model_b": "ChatDiT",
501
+ "winner": "model_a",
502
+ "judge": "arena_user_127.0.0.1",
503
+ "anony": true,
504
+ "tstamp": 1735286374.6751
505
+ },
506
+ {
507
+ "model_a": "GPT-4o + FLUX.1 [dev]",
508
+ "model_b": "GPT-4o + Emu2",
509
+ "winner": "model_a",
510
+ "judge": "arena_user_127.0.0.1",
511
+ "anony": true,
512
+ "tstamp": 1735286382.1211
513
+ },
514
+ {
515
+ "model_a": "GPT-4o + PixArt-Sigma",
516
+ "model_b": "GPT-4o + OmniGen",
517
+ "winner": "model_a",
518
+ "judge": "arena_user_127.0.0.1",
519
+ "anony": true,
520
+ "tstamp": 1735288723.7052
521
+ },
522
+ {
523
+ "model_a": "GPT-4o + FLUX.1 [dev]",
524
+ "model_b": "GPT-4o + DALLE-3",
525
+ "winner": "model_a",
526
+ "judge": "arena_user_127.0.0.1",
527
+ "anony": true,
528
+ "tstamp": 1735288729.3576
529
+ },
530
+ {
531
+ "model_a": "GPT-4o + PixArt-Sigma",
532
+ "model_b": "GPT-4o + OmniGen",
533
+ "winner": "model_a",
534
+ "judge": "arena_user_127.0.0.1",
535
+ "anony": true,
536
+ "tstamp": 1735288749.1708
537
+ },
538
+ {
539
+ "model_a": "GPT-4o + FLUX.1 [dev]",
540
+ "model_b": "GPT-4o + Stable Diffusion 3 Medium",
541
+ "winner": "model_a",
542
+ "judge": "arena_user_127.0.0.1",
543
+ "anony": true,
544
+ "tstamp": 1736305459.7554
545
+ },
546
+ {
547
+ "model_a": "GPT-4o + Emu2",
548
+ "model_b": "GPT-4o + DALLE-3",
549
+ "winner": "model_b",
550
+ "judge": "arena_user_127.0.0.1",
551
+ "anony": true,
552
+ "tstamp": 1736305568.3703
553
+ },
554
+ {
555
+ "model_a": "GPT-4o + Emu2",
556
+ "model_b": "GPT-4o + PixArt-Sigma",
557
+ "winner": "model_b",
558
+ "judge": "arena_user_127.0.0.1",
559
+ "anony": true,
560
+ "tstamp": 1736305578.3648
561
+ },
562
+ {
563
+ "model_a": "GPT-4o + OmniGen",
564
+ "model_b": "GPT-4o + PixArt-Sigma",
565
+ "winner": "model_b",
566
+ "judge": "arena_user_10.16.39.228",
567
+ "anony": true,
568
+ "tstamp": 1736316463.4086
569
+ },
570
+ {
571
+ "model_a": "GPT-4o + OmniGen",
572
+ "model_b": "GPT-4o + PixArt-Sigma",
573
+ "winner": "model_a",
574
+ "judge": "arena_user_10.16.30.109",
575
+ "anony": true,
576
+ "tstamp": 1736316525.3474
577
+ },
578
+ {
579
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
580
+ "model_b": "GPT-4o + DALLE-3",
581
+ "winner": "tie (bothbad)",
582
+ "judge": "arena_user_10.16.9.166",
583
+ "anony": false,
584
+ "tstamp": 1736317079.2219
585
+ },
586
+ {
587
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
588
+ "model_b": "GPT-4o + OmniGen",
589
+ "winner": "model_a",
590
+ "judge": "arena_user_10.16.39.228",
591
+ "anony": true,
592
+ "tstamp": 1736317103.5229
593
+ },
594
+ {
595
+ "model_a": "ChatDiT",
596
+ "model_b": "GPT-4o + PixArt-Sigma",
597
+ "winner": "model_b",
598
+ "judge": "arena_user_10.16.9.166",
599
+ "anony": true,
600
+ "tstamp": 1736317151.2313
601
+ },
602
+ {
603
+ "model_a": "GPT-4o + Emu2",
604
+ "model_b": "GPT-4o + PixArt-Sigma",
605
+ "winner": "model_b",
606
+ "judge": "arena_user_10.16.24.150",
607
+ "anony": true,
608
+ "tstamp": 1736317260.068
609
+ },
610
+ {
611
+ "model_a": "GPT-4o + PixArt-Sigma",
612
+ "model_b": "GPT-4o + OmniGen",
613
+ "winner": "model_b",
614
+ "judge": "arena_user_172.18.13.178",
615
+ "anony": true,
616
+ "tstamp": 1736320695.0812
617
+ },
618
+ {
619
+ "model_a": "ChatDiT",
620
+ "model_b": "GPT-4o + Stable Diffusion 3 Medium",
621
+ "winner": "model_a",
622
+ "judge": "arena_user_10.16.43.67",
623
+ "anony": true,
624
+ "tstamp": 1736321735.4094
625
+ },
626
+ {
627
+ "model_a": "GPT-4o + PixArt-Sigma",
628
+ "model_b": "GPT-4o + FLUX.1 [dev]",
629
+ "winner": "model_a",
630
+ "judge": "arena_user_10.16.24.150",
631
+ "anony": true,
632
+ "tstamp": 1736335598.6764
633
+ }
634
+ ]
arena_elo/results/20250108/elo_results.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bcf8ae6947143cd90ff0649dfa15dbfb465397de67be4e0b39ff34e0f705df3
3
+ size 57457
arena_elo/results/20250108/leaderboard.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ key,Model,Arena Elo rating (anony),Arena Elo rating (full),license,creator,link
2
+ ChatDiT,ChatDiT,1141.9082940955832,1141.3534939692347,MIT License,Tongyi Lab,https://github.com/ali-vilab/ChatDiT
3
+ GPT-4o + FLUX.1 [dev],GPT-4o + FLUX.1 [dev],1106.4413262289752,1096.9228824729828,FLUX.1 [dev] Non-Commercial License,Black Forest Labs,https://huggingface.co/black-forest-labs/FLUX.1-dev
4
+ GPT-4o + Stable Diffusion 3 Medium,GPT-4o + Stable Diffusion 3 Medium,1037.4899722855018,1026.2775791477911,Stability AI Community License,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
5
+ GPT-4o + PixArt-Sigma,GPT-4o + PixArt-Sigma,1030.2322549298108,1003.2938185806802,CreativeML Open RAIL++-M License,Huawei Noah's Ark Lab,https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
6
+ GPT-4o + Emu2,GPT-4o + Emu2,941.1199462147184,946.5946854676025,Apache License 2.0,BAAI,https://huggingface.co/BAAI/Emu2
7
+ GPT-4o + DALLE-3,GPT-4o + DALLE-3,884.7999841743839,941.2186221773824,OpenAI Terms of Use,OpenAI,https://openai.com/index/dall-e-3/
8
+ GPT-4o + OmniGen,GPT-4o + OmniGen,858.0082220710267,844.3389181843263,MIT License,BAAI,https://huggingface.co/spaces/Shitao/OmniGen
arena_elo/results/latest/clean_battle.json CHANGED
@@ -1,6 +1,6 @@
1
  [
2
  {
3
- "model_a": "FLUX-1",
4
  "model_b": "ChatDiT",
5
  "winner": "model_b",
6
  "judge": "arena_user_127.0.0.1",
@@ -8,7 +8,7 @@
8
  "tstamp": 1735030427.6669
9
  },
10
  {
11
- "model_a": "FLUX-1",
12
  "model_b": "ChatDiT",
13
  "winner": "model_a",
14
  "judge": "arena_user_127.0.0.1",
@@ -17,7 +17,7 @@
17
  },
18
  {
19
  "model_a": "ChatDiT",
20
- "model_b": "FLUX-1",
21
  "winner": "model_a",
22
  "judge": "arena_user_127.0.0.1",
23
  "anony": true,
@@ -25,14 +25,14 @@
25
  },
26
  {
27
  "model_a": "ChatDiT",
28
- "model_b": "FLUX-1",
29
  "winner": "model_a",
30
  "judge": "arena_user_127.0.0.1",
31
  "anony": true,
32
  "tstamp": 1735030476.2328
33
  },
34
  {
35
- "model_a": "FLUX-1",
36
  "model_b": "ChatDiT",
37
  "winner": "tie (bothbad)",
38
  "judge": "arena_user_127.0.0.1",
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "model_a": "ChatDiT",
44
- "model_b": "FLUX-1",
45
  "winner": "tie (bothbad)",
46
  "judge": "arena_user_127.0.0.1",
47
  "anony": true,
@@ -49,7 +49,7 @@
49
  },
50
  {
51
  "model_a": "ChatDiT",
52
- "model_b": "FLUX-1",
53
  "winner": "model_a",
54
  "judge": "arena_user_127.0.0.1",
55
  "anony": true,
@@ -57,7 +57,7 @@
57
  },
58
  {
59
  "model_a": "ChatDiT",
60
- "model_b": "FLUX-1",
61
  "winner": "tie (bothbad)",
62
  "judge": "arena_user_127.0.0.1",
63
  "anony": true,
@@ -65,7 +65,7 @@
65
  },
66
  {
67
  "model_a": "ChatDiT",
68
- "model_b": "FLUX-1",
69
  "winner": "model_a",
70
  "judge": "arena_user_127.0.0.1",
71
  "anony": true,
@@ -73,14 +73,14 @@
73
  },
74
  {
75
  "model_a": "ChatDiT",
76
- "model_b": "FLUX-1",
77
  "winner": "model_a",
78
  "judge": "arena_user_127.0.0.1",
79
  "anony": true,
80
  "tstamp": 1735034284.7354
81
  },
82
  {
83
- "model_a": "FLUX-1",
84
  "model_b": "ChatDiT",
85
  "winner": "model_a",
86
  "judge": "arena_user_127.0.0.1",
@@ -89,7 +89,7 @@
89
  },
90
  {
91
  "model_a": "ChatDiT",
92
- "model_b": "FLUX-1",
93
  "winner": "model_b",
94
  "judge": "arena_user_127.0.0.1",
95
  "anony": true,
@@ -97,14 +97,14 @@
97
  },
98
  {
99
  "model_a": "ChatDiT",
100
- "model_b": "FLUX-1",
101
  "winner": "model_a",
102
  "judge": "arena_user_127.0.0.1",
103
  "anony": true,
104
  "tstamp": 1735034314.1941
105
  },
106
  {
107
- "model_a": "FLUX-1",
108
  "model_b": "ChatDiT",
109
  "winner": "model_a",
110
  "judge": "arena_user_127.0.0.1",
@@ -112,7 +112,7 @@
112
  "tstamp": 1735034326.5092
113
  },
114
  {
115
- "model_a": "FLUX-1",
116
  "model_b": "ChatDiT",
117
  "winner": "model_b",
118
  "judge": "arena_user_127.0.0.1",
@@ -120,7 +120,7 @@
120
  "tstamp": 1735034331.6963
121
  },
122
  {
123
- "model_a": "FLUX-1",
124
  "model_b": "ChatDiT",
125
  "winner": "tie (bothbad)",
126
  "judge": "arena_user_127.0.0.1",
@@ -129,14 +129,14 @@
129
  },
130
  {
131
  "model_a": "ChatDiT",
132
- "model_b": "FLUX-1",
133
  "winner": "model_b",
134
  "judge": "arena_user_127.0.0.1",
135
  "anony": true,
136
  "tstamp": 1735034351.9521
137
  },
138
  {
139
- "model_a": "FLUX-1",
140
  "model_b": "ChatDiT",
141
  "winner": "model_b",
142
  "judge": "arena_user_127.0.0.1",
@@ -144,7 +144,7 @@
144
  "tstamp": 1735034366.1775
145
  },
146
  {
147
- "model_a": "FLUX-1",
148
  "model_b": "ChatDiT",
149
  "winner": "model_a",
150
  "judge": "arena_user_127.0.0.1",
@@ -153,14 +153,14 @@
153
  },
154
  {
155
  "model_a": "ChatDiT",
156
- "model_b": "FLUX-1",
157
  "winner": "model_b",
158
  "judge": "arena_user_127.0.0.1",
159
  "anony": true,
160
  "tstamp": 1735034384.3087
161
  },
162
  {
163
- "model_a": "FLUX-1",
164
  "model_b": "ChatDiT",
165
  "winner": "model_a",
166
  "judge": "arena_user_127.0.0.1",
@@ -168,7 +168,7 @@
168
  "tstamp": 1735034389.1583
169
  },
170
  {
171
- "model_a": "FLUX-1",
172
  "model_b": "ChatDiT",
173
  "winner": "model_b",
174
  "judge": "arena_user_127.0.0.1",
@@ -176,7 +176,7 @@
176
  "tstamp": 1735034405.9359
177
  },
178
  {
179
- "model_a": "FLUX-1",
180
  "model_b": "ChatDiT",
181
  "winner": "model_b",
182
  "judge": "arena_user_127.0.0.1",
@@ -184,7 +184,7 @@
184
  "tstamp": 1735034412.3533
185
  },
186
  {
187
- "model_a": "FLUX-1",
188
  "model_b": "ChatDiT",
189
  "winner": "model_a",
190
  "judge": "arena_user_127.0.0.1",
@@ -192,7 +192,7 @@
192
  "tstamp": 1735034419.0118
193
  },
194
  {
195
- "model_a": "FLUX-1",
196
  "model_b": "ChatDiT",
197
  "winner": "model_b",
198
  "judge": "arena_user_127.0.0.1",
@@ -200,7 +200,7 @@
200
  "tstamp": 1735034425.6972
201
  },
202
  {
203
- "model_a": "FLUX-1",
204
  "model_b": "ChatDiT",
205
  "winner": "model_b",
206
  "judge": "arena_user_127.0.0.1",
@@ -209,14 +209,14 @@
209
  },
210
  {
211
  "model_a": "ChatDiT",
212
- "model_b": "FLUX-1",
213
  "winner": "model_a",
214
  "judge": "arena_user_127.0.0.1",
215
  "anony": true,
216
  "tstamp": 1735092762.0
217
  },
218
  {
219
- "model_a": "FLUX-1",
220
  "model_b": "ChatDiT",
221
  "winner": "tie (bothbad)",
222
  "judge": "arena_user_127.0.0.1",
@@ -224,7 +224,7 @@
224
  "tstamp": 1735092774.618
225
  },
226
  {
227
- "model_a": "FLUX-1",
228
  "model_b": "ChatDiT",
229
  "winner": "model_a",
230
  "judge": "arena_user_127.0.0.1",
@@ -232,7 +232,7 @@
232
  "tstamp": 1735092797.2067
233
  },
234
  {
235
- "model_a": "FLUX-1",
236
  "model_b": "ChatDiT",
237
  "winner": "model_b",
238
  "judge": "arena_user_127.0.0.1",
@@ -240,7 +240,7 @@
240
  "tstamp": 1735092804.6699
241
  },
242
  {
243
- "model_a": "FLUX-1",
244
  "model_b": "ChatDiT",
245
  "winner": "model_a",
246
  "judge": "arena_user_127.0.0.1",
@@ -248,7 +248,7 @@
248
  "tstamp": 1735092810.2635
249
  },
250
  {
251
- "model_a": "FLUX-1",
252
  "model_b": "ChatDiT",
253
  "winner": "model_b",
254
  "judge": "arena_user_127.0.0.1",
@@ -257,7 +257,7 @@
257
  },
258
  {
259
  "model_a": "ChatDiT",
260
- "model_b": "FLUX-1",
261
  "winner": "tie (bothbad)",
262
  "judge": "arena_user_127.0.0.1",
263
  "anony": true,
@@ -558,5 +558,77 @@
558
  "judge": "arena_user_127.0.0.1",
559
  "anony": true,
560
  "tstamp": 1736305578.3648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  }
562
  ]
 
1
  [
2
  {
3
+ "model_a": "GPT-4o + FLUX.1 [dev]",
4
  "model_b": "ChatDiT",
5
  "winner": "model_b",
6
  "judge": "arena_user_127.0.0.1",
 
8
  "tstamp": 1735030427.6669
9
  },
10
  {
11
+ "model_a": "GPT-4o + FLUX.1 [dev]",
12
  "model_b": "ChatDiT",
13
  "winner": "model_a",
14
  "judge": "arena_user_127.0.0.1",
 
17
  },
18
  {
19
  "model_a": "ChatDiT",
20
+ "model_b": "GPT-4o + FLUX.1 [dev]",
21
  "winner": "model_a",
22
  "judge": "arena_user_127.0.0.1",
23
  "anony": true,
 
25
  },
26
  {
27
  "model_a": "ChatDiT",
28
+ "model_b": "GPT-4o + FLUX.1 [dev]",
29
  "winner": "model_a",
30
  "judge": "arena_user_127.0.0.1",
31
  "anony": true,
32
  "tstamp": 1735030476.2328
33
  },
34
  {
35
+ "model_a": "GPT-4o + FLUX.1 [dev]",
36
  "model_b": "ChatDiT",
37
  "winner": "tie (bothbad)",
38
  "judge": "arena_user_127.0.0.1",
 
41
  },
42
  {
43
  "model_a": "ChatDiT",
44
+ "model_b": "GPT-4o + FLUX.1 [dev]",
45
  "winner": "tie (bothbad)",
46
  "judge": "arena_user_127.0.0.1",
47
  "anony": true,
 
49
  },
50
  {
51
  "model_a": "ChatDiT",
52
+ "model_b": "GPT-4o + FLUX.1 [dev]",
53
  "winner": "model_a",
54
  "judge": "arena_user_127.0.0.1",
55
  "anony": true,
 
57
  },
58
  {
59
  "model_a": "ChatDiT",
60
+ "model_b": "GPT-4o + FLUX.1 [dev]",
61
  "winner": "tie (bothbad)",
62
  "judge": "arena_user_127.0.0.1",
63
  "anony": true,
 
65
  },
66
  {
67
  "model_a": "ChatDiT",
68
+ "model_b": "GPT-4o + FLUX.1 [dev]",
69
  "winner": "model_a",
70
  "judge": "arena_user_127.0.0.1",
71
  "anony": true,
 
73
  },
74
  {
75
  "model_a": "ChatDiT",
76
+ "model_b": "GPT-4o + FLUX.1 [dev]",
77
  "winner": "model_a",
78
  "judge": "arena_user_127.0.0.1",
79
  "anony": true,
80
  "tstamp": 1735034284.7354
81
  },
82
  {
83
+ "model_a": "GPT-4o + FLUX.1 [dev]",
84
  "model_b": "ChatDiT",
85
  "winner": "model_a",
86
  "judge": "arena_user_127.0.0.1",
 
89
  },
90
  {
91
  "model_a": "ChatDiT",
92
+ "model_b": "GPT-4o + FLUX.1 [dev]",
93
  "winner": "model_b",
94
  "judge": "arena_user_127.0.0.1",
95
  "anony": true,
 
97
  },
98
  {
99
  "model_a": "ChatDiT",
100
+ "model_b": "GPT-4o + FLUX.1 [dev]",
101
  "winner": "model_a",
102
  "judge": "arena_user_127.0.0.1",
103
  "anony": true,
104
  "tstamp": 1735034314.1941
105
  },
106
  {
107
+ "model_a": "GPT-4o + FLUX.1 [dev]",
108
  "model_b": "ChatDiT",
109
  "winner": "model_a",
110
  "judge": "arena_user_127.0.0.1",
 
112
  "tstamp": 1735034326.5092
113
  },
114
  {
115
+ "model_a": "GPT-4o + FLUX.1 [dev]",
116
  "model_b": "ChatDiT",
117
  "winner": "model_b",
118
  "judge": "arena_user_127.0.0.1",
 
120
  "tstamp": 1735034331.6963
121
  },
122
  {
123
+ "model_a": "GPT-4o + FLUX.1 [dev]",
124
  "model_b": "ChatDiT",
125
  "winner": "tie (bothbad)",
126
  "judge": "arena_user_127.0.0.1",
 
129
  },
130
  {
131
  "model_a": "ChatDiT",
132
+ "model_b": "GPT-4o + FLUX.1 [dev]",
133
  "winner": "model_b",
134
  "judge": "arena_user_127.0.0.1",
135
  "anony": true,
136
  "tstamp": 1735034351.9521
137
  },
138
  {
139
+ "model_a": "GPT-4o + FLUX.1 [dev]",
140
  "model_b": "ChatDiT",
141
  "winner": "model_b",
142
  "judge": "arena_user_127.0.0.1",
 
144
  "tstamp": 1735034366.1775
145
  },
146
  {
147
+ "model_a": "GPT-4o + FLUX.1 [dev]",
148
  "model_b": "ChatDiT",
149
  "winner": "model_a",
150
  "judge": "arena_user_127.0.0.1",
 
153
  },
154
  {
155
  "model_a": "ChatDiT",
156
+ "model_b": "GPT-4o + FLUX.1 [dev]",
157
  "winner": "model_b",
158
  "judge": "arena_user_127.0.0.1",
159
  "anony": true,
160
  "tstamp": 1735034384.3087
161
  },
162
  {
163
+ "model_a": "GPT-4o + FLUX.1 [dev]",
164
  "model_b": "ChatDiT",
165
  "winner": "model_a",
166
  "judge": "arena_user_127.0.0.1",
 
168
  "tstamp": 1735034389.1583
169
  },
170
  {
171
+ "model_a": "GPT-4o + FLUX.1 [dev]",
172
  "model_b": "ChatDiT",
173
  "winner": "model_b",
174
  "judge": "arena_user_127.0.0.1",
 
176
  "tstamp": 1735034405.9359
177
  },
178
  {
179
+ "model_a": "GPT-4o + FLUX.1 [dev]",
180
  "model_b": "ChatDiT",
181
  "winner": "model_b",
182
  "judge": "arena_user_127.0.0.1",
 
184
  "tstamp": 1735034412.3533
185
  },
186
  {
187
+ "model_a": "GPT-4o + FLUX.1 [dev]",
188
  "model_b": "ChatDiT",
189
  "winner": "model_a",
190
  "judge": "arena_user_127.0.0.1",
 
192
  "tstamp": 1735034419.0118
193
  },
194
  {
195
+ "model_a": "GPT-4o + FLUX.1 [dev]",
196
  "model_b": "ChatDiT",
197
  "winner": "model_b",
198
  "judge": "arena_user_127.0.0.1",
 
200
  "tstamp": 1735034425.6972
201
  },
202
  {
203
+ "model_a": "GPT-4o + FLUX.1 [dev]",
204
  "model_b": "ChatDiT",
205
  "winner": "model_b",
206
  "judge": "arena_user_127.0.0.1",
 
209
  },
210
  {
211
  "model_a": "ChatDiT",
212
+ "model_b": "GPT-4o + FLUX.1 [dev]",
213
  "winner": "model_a",
214
  "judge": "arena_user_127.0.0.1",
215
  "anony": true,
216
  "tstamp": 1735092762.0
217
  },
218
  {
219
+ "model_a": "GPT-4o + FLUX.1 [dev]",
220
  "model_b": "ChatDiT",
221
  "winner": "tie (bothbad)",
222
  "judge": "arena_user_127.0.0.1",
 
224
  "tstamp": 1735092774.618
225
  },
226
  {
227
+ "model_a": "GPT-4o + FLUX.1 [dev]",
228
  "model_b": "ChatDiT",
229
  "winner": "model_a",
230
  "judge": "arena_user_127.0.0.1",
 
232
  "tstamp": 1735092797.2067
233
  },
234
  {
235
+ "model_a": "GPT-4o + FLUX.1 [dev]",
236
  "model_b": "ChatDiT",
237
  "winner": "model_b",
238
  "judge": "arena_user_127.0.0.1",
 
240
  "tstamp": 1735092804.6699
241
  },
242
  {
243
+ "model_a": "GPT-4o + FLUX.1 [dev]",
244
  "model_b": "ChatDiT",
245
  "winner": "model_a",
246
  "judge": "arena_user_127.0.0.1",
 
248
  "tstamp": 1735092810.2635
249
  },
250
  {
251
+ "model_a": "GPT-4o + FLUX.1 [dev]",
252
  "model_b": "ChatDiT",
253
  "winner": "model_b",
254
  "judge": "arena_user_127.0.0.1",
 
257
  },
258
  {
259
  "model_a": "ChatDiT",
260
+ "model_b": "GPT-4o + FLUX.1 [dev]",
261
  "winner": "tie (bothbad)",
262
  "judge": "arena_user_127.0.0.1",
263
  "anony": true,
 
558
  "judge": "arena_user_127.0.0.1",
559
  "anony": true,
560
  "tstamp": 1736305578.3648
561
+ },
562
+ {
563
+ "model_a": "GPT-4o + OmniGen",
564
+ "model_b": "GPT-4o + PixArt-Sigma",
565
+ "winner": "model_b",
566
+ "judge": "arena_user_10.16.39.228",
567
+ "anony": true,
568
+ "tstamp": 1736316463.4086
569
+ },
570
+ {
571
+ "model_a": "GPT-4o + OmniGen",
572
+ "model_b": "GPT-4o + PixArt-Sigma",
573
+ "winner": "model_a",
574
+ "judge": "arena_user_10.16.30.109",
575
+ "anony": true,
576
+ "tstamp": 1736316525.3474
577
+ },
578
+ {
579
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
580
+ "model_b": "GPT-4o + DALLE-3",
581
+ "winner": "tie (bothbad)",
582
+ "judge": "arena_user_10.16.9.166",
583
+ "anony": false,
584
+ "tstamp": 1736317079.2219
585
+ },
586
+ {
587
+ "model_a": "GPT-4o + Stable Diffusion 3 Medium",
588
+ "model_b": "GPT-4o + OmniGen",
589
+ "winner": "model_a",
590
+ "judge": "arena_user_10.16.39.228",
591
+ "anony": true,
592
+ "tstamp": 1736317103.5229
593
+ },
594
+ {
595
+ "model_a": "ChatDiT",
596
+ "model_b": "GPT-4o + PixArt-Sigma",
597
+ "winner": "model_b",
598
+ "judge": "arena_user_10.16.9.166",
599
+ "anony": true,
600
+ "tstamp": 1736317151.2313
601
+ },
602
+ {
603
+ "model_a": "GPT-4o + Emu2",
604
+ "model_b": "GPT-4o + PixArt-Sigma",
605
+ "winner": "model_b",
606
+ "judge": "arena_user_10.16.24.150",
607
+ "anony": true,
608
+ "tstamp": 1736317260.068
609
+ },
610
+ {
611
+ "model_a": "GPT-4o + PixArt-Sigma",
612
+ "model_b": "GPT-4o + OmniGen",
613
+ "winner": "model_b",
614
+ "judge": "arena_user_172.18.13.178",
615
+ "anony": true,
616
+ "tstamp": 1736320695.0812
617
+ },
618
+ {
619
+ "model_a": "ChatDiT",
620
+ "model_b": "GPT-4o + Stable Diffusion 3 Medium",
621
+ "winner": "model_a",
622
+ "judge": "arena_user_10.16.43.67",
623
+ "anony": true,
624
+ "tstamp": 1736321735.4094
625
+ },
626
+ {
627
+ "model_a": "GPT-4o + PixArt-Sigma",
628
+ "model_b": "GPT-4o + FLUX.1 [dev]",
629
+ "winner": "model_a",
630
+ "judge": "arena_user_10.16.24.150",
631
+ "anony": true,
632
+ "tstamp": 1736335598.6764
633
  }
634
  ]
arena_elo/results/latest/elo_results.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f156bf1709b8b009bb3fbd5eaaf2754c2784c675fd9d24822d0ee15449918600
3
- size 59971
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bcf8ae6947143cd90ff0649dfa15dbfb465397de67be4e0b39ff34e0f705df3
3
+ size 57457
arena_elo/results/latest/leaderboard.csv CHANGED
@@ -1,9 +1,8 @@
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),license,creator,link
2
- GPT-4o + FLUX.1 [dev],GPT-4o + FLUX.1 [dev],1291.5350276356353,1192.7931512893806,FLUX.1 [dev] Non-Commercial License,Black Forest Labs,https://huggingface.co/black-forest-labs/FLUX.1-dev
3
- GPT-4o + Stable Diffusion 3 Medium,GPT-4o + Stable Diffusion 3 Medium,1072.5747207950458,1048.875237084943,Stability AI Community License,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
4
- ChatDiT,ChatDiT,1071.8745978223071,1129.7326464913726,MIT License,Tongyi Lab,https://github.com/ali-vilab/ChatDiT
5
- FLUX-1,FLUX-1,1018.6022943249966,1075.7881020731181,N/A,N/A,N/A
6
- GPT-4o + Emu2,GPT-4o + Emu2,973.8076810129663,983.3170687066316,Apache License 2.0,BAAI,https://huggingface.co/BAAI/Emu2
7
- GPT-4o + PixArt-Sigma,GPT-4o + PixArt-Sigma,972.4179488470709,945.3527099410124,CreativeML Open RAIL++-M License,Huawei Noah's Ark Lab,https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
8
- GPT-4o + DALLE-3,GPT-4o + DALLE-3,871.6193286007302,907.0475236573719,OpenAI Terms of Use,OpenAI,https://openai.com/index/dall-e-3/
9
- GPT-4o + OmniGen,GPT-4o + OmniGen,727.5684009612476,717.0935607561696,MIT License,BAAI,https://huggingface.co/spaces/Shitao/OmniGen
 
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),license,creator,link
2
+ ChatDiT,ChatDiT,1141.9082940955832,1141.3534939692347,MIT License,Tongyi Lab,https://github.com/ali-vilab/ChatDiT
3
+ GPT-4o + FLUX.1 [dev],GPT-4o + FLUX.1 [dev],1106.4413262289752,1096.9228824729828,FLUX.1 [dev] Non-Commercial License,Black Forest Labs,https://huggingface.co/black-forest-labs/FLUX.1-dev
4
+ GPT-4o + Stable Diffusion 3 Medium,GPT-4o + Stable Diffusion 3 Medium,1037.4899722855018,1026.2775791477911,Stability AI Community License,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
5
+ GPT-4o + PixArt-Sigma,GPT-4o + PixArt-Sigma,1030.2322549298108,1003.2938185806802,CreativeML Open RAIL++-M License,Huawei Noah's Ark Lab,https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
6
+ GPT-4o + Emu2,GPT-4o + Emu2,941.1199462147184,946.5946854676025,Apache License 2.0,BAAI,https://huggingface.co/BAAI/Emu2
7
+ GPT-4o + DALLE-3,GPT-4o + DALLE-3,884.7999841743839,941.2186221773824,OpenAI Terms of Use,OpenAI,https://openai.com/index/dall-e-3/
8
+ GPT-4o + OmniGen,GPT-4o + OmniGen,858.0082220710267,844.3389181843263,MIT License,BAAI,https://huggingface.co/spaces/Shitao/OmniGen