Spaces:

Omartificial-Intelligence-Space
/

Arabic-MMMLU-Leaderborad

Running

File size: 8,113 Bytes

from enum import Enum
from dataclasses import dataclass

# Define TaskInfo and Tasks as before
@dataclass
class TaskInfo:
    benchmark: str
    col_name: str
    metric: str


# src/about.py

from enum import Enum
from dataclasses import dataclass

# Define TaskInfo dataclass
@dataclass
class TaskInfo:
    benchmark: str
    col_name: str
    metric: str

# Define Tasks enum with your specific subjects, excluding the unwanted ones
class Tasks(Enum):
    Professional_Law = TaskInfo(benchmark='professional_law', col_name='Professional Law', metric='accuracy')
    Moral_Scenarios = TaskInfo(benchmark='moral_scenarios', col_name='Moral Scenarios', metric='accuracy')
    Miscellaneous = TaskInfo(benchmark='miscellaneous', col_name='Miscellaneous', metric='accuracy')
    High_School_Psychology = TaskInfo(benchmark='high_school_psychology', col_name='High School Psychology', metric='accuracy')
    High_School_Macroeconomics = TaskInfo(benchmark='high_school_macroeconomics', col_name='High School Macroeconomics', metric='accuracy')
    Elementary_Mathematics = TaskInfo(benchmark='elementary_mathematics', col_name='Elementary Mathematics', metric='accuracy')
    Prehistory = TaskInfo(benchmark='prehistory', col_name='Prehistory', metric='accuracy')
    Philosophy = TaskInfo(benchmark='philosophy', col_name='Philosophy', metric='accuracy')
    High_School_Biology = TaskInfo(benchmark='high_school_biology', col_name='High School Biology', metric='accuracy')
    Nutrition = TaskInfo(benchmark='nutrition', col_name='Nutrition', metric='accuracy')
    Professional_Accounting = TaskInfo(benchmark='professional_accounting', col_name='Professional Accounting', metric='accuracy')
    Professional_Medicine = TaskInfo(benchmark='professional_medicine', col_name='Professional Medicine', metric='accuracy')
    High_School_Mathematics = TaskInfo(benchmark='high_school_mathematics', col_name='High School Mathematics', metric='accuracy')
    Clinical_Knowledge = TaskInfo(benchmark='clinical_knowledge', col_name='Clinical Knowledge', metric='accuracy')
    Security_Studies = TaskInfo(benchmark='security_studies', col_name='Security Studies', metric='accuracy')
    High_School_Microeconomics = TaskInfo(benchmark='high_school_microeconomics', col_name='High School Microeconomics', metric='accuracy')
    High_School_World_History = TaskInfo(benchmark='high_school_world_history', col_name='High School World History', metric='accuracy')
    Conceptual_Physics = TaskInfo(benchmark='conceptual_physics', col_name='Conceptual Physics', metric='accuracy')
    Marketing = TaskInfo(benchmark='marketing', col_name='Marketing', metric='accuracy')
    High_School_Statistics = TaskInfo(benchmark='high_school_statistics', col_name='High School Statistics', metric='accuracy')
    High_School_US_History = TaskInfo(benchmark='high_school_us_history', col_name='High School US History', metric='accuracy')
    High_School_Chemistry = TaskInfo(benchmark='high_school_chemistry', col_name='High School Chemistry', metric='accuracy')
    Sociology = TaskInfo(benchmark='sociology', col_name='Sociology', metric='accuracy')
    High_School_Geography = TaskInfo(benchmark='high_school_geography', col_name='High School Geography', metric='accuracy')
    High_School_Government_and_Politics = TaskInfo(benchmark='high_school_government_and_politics', col_name='High School Government and Politics', metric='accuracy')
    College_Medicine = TaskInfo(benchmark='college_medicine', col_name='College Medicine', metric='accuracy')
    Virology = TaskInfo(benchmark='virology', col_name='Virology', metric='accuracy')
    High_School_European_History = TaskInfo(benchmark='high_school_european_history', col_name='High School European History', metric='accuracy')
    Logical_Fallacies = TaskInfo(benchmark='logical_fallacies', col_name='Logical Fallacies', metric='accuracy')
    Astronomy = TaskInfo(benchmark='astronomy', col_name='Astronomy', metric='accuracy')
    High_School_Physics = TaskInfo(benchmark='high_school_physics', col_name='High School Physics', metric='accuracy')
    Electrical_Engineering = TaskInfo(benchmark='electrical_engineering', col_name='Electrical Engineering', metric='accuracy')
    College_Biology = TaskInfo(benchmark='college_biology', col_name='College Biology', metric='accuracy')
    Anatomy = TaskInfo(benchmark='anatomy', col_name='Anatomy', metric='accuracy')
    Formal_Logic = TaskInfo(benchmark='formal_logic', col_name='Formal Logic', metric='accuracy')
    International_Law = TaskInfo(benchmark='international_law', col_name='International Law', metric='accuracy')
    Econometrics = TaskInfo(benchmark='econometrics', col_name='Econometrics', metric='accuracy')
    Machine_Learning = TaskInfo(benchmark='machine_learning', col_name='Machine Learning', metric='accuracy')
    Management = TaskInfo(benchmark='management', col_name='Management', metric='accuracy')
    College_Physics = TaskInfo(benchmark='college_physics', col_name='College Physics', metric='accuracy')
    US_Foreign_Policy = TaskInfo(benchmark='us_foreign_policy', col_name='US Foreign Policy', metric='accuracy')
    Business_Ethics = TaskInfo(benchmark='business_ethics', col_name='Business Ethics', metric='accuracy')
    College_Mathematics = TaskInfo(benchmark='college_mathematics', col_name='College Mathematics', metric='accuracy')
    College_Chemistry = TaskInfo(benchmark='college_chemistry', col_name='College Chemistry', metric='accuracy')
    College_Computer_Science = TaskInfo(benchmark='college_computer_science', col_name='College Computer Science', metric='accuracy')
    High_School_Computer_Science = TaskInfo(benchmark='high_school_computer_science', col_name='High School Computer Science', metric='accuracy')
    Computer_Security = TaskInfo(benchmark='computer_security', col_name='Computer Security', metric='accuracy')
    Global_Facts = TaskInfo(benchmark='global_facts', col_name='Global Facts', metric='accuracy')
    Medical_Genetics = TaskInfo(benchmark='medical_genetics', col_name='Medical Genetics', metric='accuracy')
    Abstract_Algebra = TaskInfo(benchmark='abstract_algebra', col_name='Abstract Algebra', metric='accuracy')


# Now include the variables expected by app.py

TITLE = """
<div align="center">
    <a href="https://imgbb.com/">
        <img src="https://i.ibb.co/zHzhXqb/Smile-Group-Logo.png" alt="Smile-Group-Logo" border="0" width="800" height="auto">
    </a>
</div>
"""



INTRODUCTION_TEXT = """
<div style="background-color:#001f3f; padding: 20px; border-radius: 10px;">
    <h1 style="color:#ffffff; font-family: Arial, sans-serif; text-align: center;">
        Welcome to <span style="color:#f39c12;">BASMA</span>: Benchmark for Arabic System in Multitask Assessment
    </h1>
    <p style="color:#d4d4d4; font-family: 'Verdana', sans-serif; font-size: 18px; text-align: center;">
        This leaderboard showcases the performance of various Arabic large language models on the 
        <strong>new released MMMLU OpenAI dataset</strong> across different subjects.
    </p>
</div>
"""


LLM_BENCHMARKS_TEXT = """
## About BASMA

BASMA is based on The Massive Multitask Multilingual Language Understanding (MMMLU) benchmark which is designed to evaluate Arabic models on a wide range of subjects.

## How to Interpret the Leaderboard

- **Model**: The name of the model evaluated.
- **Average ⬆️**: The average accuracy across all subjects.
- **Subject Columns**: The accuracy (%) for each individual subject.

## How to Submit Your Model

Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard.
"""

EVALUATION_QUEUE_TEXT = """
Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation.
"""

CITATION_BUTTON_LABEL = "Citation"
CITATION_BUTTON_TEXT = """
If you use this leaderboard or the MMMLU dataset in your research, please cite:
@misc{BASMA,
  author = {Nacar, Omer},
  title = {BASMA: Benchmark for Arabic System in Multitask Assessment},
  year = {2024},
  publisher = {Omartificial-Intelligence-Space}}"
}"""