Spaces:
Running
Running
David Pomerenke
commited on
Commit
·
d178010
1
Parent(s):
160ce91
Improve language chart
Browse files- index.html +27 -5
- languagebench.py +1 -1
- results.json +40 -0
- results_summary.json +5 -0
index.html
CHANGED
|
@@ -22,6 +22,8 @@
|
|
| 22 |
margin: 0;
|
| 23 |
}
|
| 24 |
</style>
|
|
|
|
|
|
|
| 25 |
</head>
|
| 26 |
|
| 27 |
<body>
|
|
@@ -37,24 +39,44 @@
|
|
| 37 |
|
| 38 |
const summary = await fetch('results_summary.json');
|
| 39 |
const summaryData = await summary.json();
|
|
|
|
| 40 |
|
| 41 |
// Create summary plot
|
| 42 |
const summaryPlot = Plot.plot({
|
| 43 |
width: 800,
|
| 44 |
height: 400,
|
| 45 |
-
|
|
|
|
| 46 |
y: { label: "BLEU Score (average across models)" },
|
| 47 |
marks: [
|
|
|
|
| 48 |
Plot.rectY(summaryData, Plot.stackX({
|
| 49 |
x: "speakers",
|
| 50 |
order: "bleu",
|
| 51 |
reverse: true,
|
| 52 |
y2: "bleu", // y2 to avoid stacking by y
|
| 53 |
-
title:
|
| 54 |
-
|
| 55 |
-
insetRight: 0.2
|
| 56 |
})),
|
| 57 |
-
Plot.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
]
|
| 59 |
});
|
| 60 |
|
|
|
|
| 22 |
margin: 0;
|
| 23 |
}
|
| 24 |
</style>
|
| 25 |
+
<link rel="icon"
|
| 26 |
+
href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22 fill=%22black%22>🌍</text></svg>">
|
| 27 |
</head>
|
| 28 |
|
| 29 |
<body>
|
|
|
|
| 39 |
|
| 40 |
const summary = await fetch('results_summary.json');
|
| 41 |
const summaryData = await summary.json();
|
| 42 |
+
const formatTitle = d => d.target_language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\nBLEU score: " + d.bleu.toFixed(1)
|
| 43 |
|
| 44 |
// Create summary plot
|
| 45 |
const summaryPlot = Plot.plot({
|
| 46 |
width: 800,
|
| 47 |
height: 400,
|
| 48 |
+
marginBottom: 100,
|
| 49 |
+
x: { label: "Number of speakers", axis: null },
|
| 50 |
y: { label: "BLEU Score (average across models)" },
|
| 51 |
marks: [
|
| 52 |
+
|
| 53 |
Plot.rectY(summaryData, Plot.stackX({
|
| 54 |
x: "speakers",
|
| 55 |
order: "bleu",
|
| 56 |
reverse: true,
|
| 57 |
y2: "bleu", // y2 to avoid stacking by y
|
| 58 |
+
title: formatTitle,
|
| 59 |
+
tip: true,
|
|
|
|
| 60 |
})),
|
| 61 |
+
Plot.rectY(summaryData, Plot.pointerX(Plot.stackX({
|
| 62 |
+
x: "speakers",
|
| 63 |
+
order: "bleu",
|
| 64 |
+
reverse: true,
|
| 65 |
+
y2: "bleu", // y2 to avoid stacking by y
|
| 66 |
+
fill: "grey",
|
| 67 |
+
}))),
|
| 68 |
+
Plot.text(summaryData, Plot.stackX({
|
| 69 |
+
filter: (d) => d.speakers > 1_000_000,
|
| 70 |
+
x: "speakers",
|
| 71 |
+
y2: "bleu",
|
| 72 |
+
order: "bleu",
|
| 73 |
+
reverse: true,
|
| 74 |
+
text: "target_language_name",
|
| 75 |
+
frameAnchor: "bottom",
|
| 76 |
+
textAnchor: "end",
|
| 77 |
+
dy: 10,
|
| 78 |
+
rotate: 270
|
| 79 |
+
}))
|
| 80 |
]
|
| 81 |
});
|
| 82 |
|
languagebench.py
CHANGED
|
@@ -25,7 +25,7 @@ original_language = "eng_Latn"
|
|
| 25 |
dataset = "floresp-v2.0-rc.3/dev"
|
| 26 |
random.seed(42)
|
| 27 |
target_languages = [f.split(".")[1] for f in os.listdir(dataset)]
|
| 28 |
-
target_languages = random.choices(target_languages, k=
|
| 29 |
# target_languages = [
|
| 30 |
# "eng_Latn",
|
| 31 |
# "deu_Latn",
|
|
|
|
| 25 |
dataset = "floresp-v2.0-rc.3/dev"
|
| 26 |
random.seed(42)
|
| 27 |
target_languages = [f.split(".")[1] for f in os.listdir(dataset)]
|
| 28 |
+
target_languages = random.choices(target_languages, k=9)
|
| 29 |
# target_languages = [
|
| 30 |
# "eng_Latn",
|
| 31 |
# "deu_Latn",
|
results.json
CHANGED
|
@@ -318,5 +318,45 @@
|
|
| 318 |
"target_language_name": "Czech",
|
| 319 |
"speakers": 10700000,
|
| 320 |
"bleu": 60.25088578142904
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
}
|
| 322 |
]
|
|
|
|
| 318 |
"target_language_name": "Czech",
|
| 319 |
"speakers": 10700000,
|
| 320 |
"bleu": 60.25088578142904
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"model": "openai/gpt-4o-mini",
|
| 324 |
+
"original_language": "eng_Latn",
|
| 325 |
+
"target_language": "sag_Latn",
|
| 326 |
+
"target_language_name": "Sango",
|
| 327 |
+
"speakers": 4600000,
|
| 328 |
+
"bleu": 2.2745290486034833
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"model": "google/gemini-flash-1.5",
|
| 332 |
+
"original_language": "eng_Latn",
|
| 333 |
+
"target_language": "sag_Latn",
|
| 334 |
+
"target_language_name": "Sango",
|
| 335 |
+
"speakers": 4600000,
|
| 336 |
+
"bleu": 5.131617554505083
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"model": "anthropic/claude-3.5-sonnet",
|
| 340 |
+
"original_language": "eng_Latn",
|
| 341 |
+
"target_language": "sag_Latn",
|
| 342 |
+
"target_language_name": "Sango",
|
| 343 |
+
"speakers": 4600000,
|
| 344 |
+
"bleu": 22.265544703760973
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"model": "qwen/qwen-2.5-72b-instruct",
|
| 348 |
+
"original_language": "eng_Latn",
|
| 349 |
+
"target_language": "sag_Latn",
|
| 350 |
+
"target_language_name": "Sango",
|
| 351 |
+
"speakers": 4600000,
|
| 352 |
+
"bleu": 1.1524444505654738
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"model": "meta-llama/llama-3.1-8b-instruct",
|
| 356 |
+
"original_language": "eng_Latn",
|
| 357 |
+
"target_language": "sag_Latn",
|
| 358 |
+
"target_language_name": "Sango",
|
| 359 |
+
"speakers": 4600000,
|
| 360 |
+
"bleu": 1.437953401517244
|
| 361 |
}
|
| 362 |
]
|
results_summary.json
CHANGED
|
@@ -38,5 +38,10 @@
|
|
| 38 |
"target_language_name":"Polish",
|
| 39 |
"bleu":59.3540779188,
|
| 40 |
"speakers":40200000.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
]
|
|
|
|
| 38 |
"target_language_name":"Polish",
|
| 39 |
"bleu":59.3540779188,
|
| 40 |
"speakers":40200000.0
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"target_language_name":"Sango",
|
| 44 |
+
"bleu":6.4524178318,
|
| 45 |
+
"speakers":4600000.0
|
| 46 |
}
|
| 47 |
]
|