tfrere's picture
add indic to indo-european group
e4f2993
raw
history blame
4.81 kB
import { useMemo, useRef } from "react";
const LANGUAGE_FAMILIES = {
"Indo-European": {
Germanic: ["english", "german", "dutch", "danish", "swedish", "icelandic"],
Romance: [
"french",
"spanish",
"italian",
"portuguese",
"romanian",
"catalan",
"galician",
],
Slavic: [
"russian",
"polish",
"czech",
"slovak",
"ukrainian",
"bulgarian",
"slovenian",
"serbian",
"croatian",
],
Baltic: ["lithuanian", "latvian"],
"Indo-Iranian": [
"persian",
"hindi",
"bengali",
"gujarati",
"nepali",
"marathi",
"indic",
],
Greek: ["greek"],
Armenian: ["armenian"],
},
"Sino-Tibetan": ["chinese", "mandarin", "taiwanese"],
Afroasiatic: [
"arabic",
"hebrew",
"darija", // Variante de l'arabe
],
Austronesian: [
"indonesian",
"malay",
"filipino",
"singlish", // Créole basé sur l'anglais mais avec influence malaise
],
"Niger-Congo": ["swahili", "yoruba"],
Dravidian: ["tamil", "telugu", "kannada", "malayalam"],
Austroasiatic: ["vietnamese"],
"Kra-Dai": ["thai"],
Japonic: [
"japanese",
"日本語", // Japonais en caractères japonais
],
Koreanic: ["korean"],
Uralic: ["hungarian", "finnish", "estonian"],
Turkic: ["turkish", "azerbaijani", "uzbek", "kazakh"],
"Language Isolate": [
"basque", // Langue isolée, pas de famille connue
],
"Other Languages": [], // Will catch any language not in other families
};
const FAMILY_ORDER = [
"Indo-European", // Plus grande famille en Europe et Asie du Sud
"Sino-Tibetan", // Deuxième plus grande famille en Asie
"Afroasiatic", // Principale famille en Afrique du Nord et Moyen-Orient
"Austronesian", // Principale famille en Asie du Sud-Est insulaire
"Niger-Congo", // Plus grande famille en Afrique subsaharienne
"Dravidian", // Principale famille en Inde du Sud
"Austroasiatic", // Importante en Asie du Sud-Est continentale
"Kra-Dai", // Famille du thaï
"Japonic", // Japonais
"Koreanic", // Coréen
"Uralic", // Famille finno-ougrienne
"Turkic", // Famille des langues turciques
"Language Isolate", // Langues isolées
"Other Languages", // Autres langues non classifiées
];
const findLanguageFamily = (language) => {
for (const [family, content] of Object.entries(LANGUAGE_FAMILIES)) {
if (family === "Indo-European") {
// Cas spécial pour l'Indo-Européen qui a des sous-familles
for (const [subFamily, languages] of Object.entries(content)) {
if (languages.includes(language.toLowerCase())) {
return { family, subFamily };
}
}
} else if (content.includes(language.toLowerCase())) {
return { family, subFamily: language };
}
}
return { family: "Other Languages", subFamily: language };
};
export const useLanguageStats = (leaderboards, filteredLeaderboards) => {
const statsRef = useRef(null);
const languagesRef = useRef(null);
// Reset stats when leaderboards or filteredLeaderboards change
if (leaderboards && filteredLeaderboards) {
// Calculate unique languages from all leaderboards
const langMap = new Map();
const langFamilyMap = new Map();
leaderboards.forEach((board) => {
board.tags?.forEach((tag) => {
if (tag.toLowerCase().startsWith("language:")) {
const lang = tag.split(":")[1].toLowerCase();
langMap.set(lang, (langMap.get(lang) || 0) + 1);
const { family } = findLanguageFamily(lang);
langFamilyMap.set(family, (langFamilyMap.get(family) || 0) + 1);
}
});
});
const langArray = Array.from(langMap.entries()).map(([lang, count]) => {
const { family } = findLanguageFamily(lang);
return [lang, count, family];
});
languagesRef.current = langArray
.sort((a, b) => {
const familyA = a[2];
const familyB = b[2];
const orderA = FAMILY_ORDER.indexOf(familyA);
const orderB = FAMILY_ORDER.indexOf(familyB);
if (orderA !== orderB) {
return orderA - orderB;
}
// Sort alphabetically by language name instead of by count
return a[0].localeCompare(b[0]);
})
.map(([lang]) => lang);
// Calculate stats based on filtered leaderboards
statsRef.current = new Map();
languagesRef.current.forEach((lang) => {
const count = filteredLeaderboards.filter((board) =>
board.tags?.some(
(tag) => tag.toLowerCase() === `language:${lang.toLowerCase()}`
)
).length;
statsRef.current.set(lang, count);
});
}
return {
languages: languagesRef.current,
languageStats: statsRef.current,
LANGUAGE_FAMILIES,
findLanguageFamily,
};
};