Spaces:
Sleeping
Sleeping
| # Napolab Leaderboard Data Configuration | |
| # This file contains all datasets and benchmark results for the Gradio app | |
| # | |
| # Data Source: "Lessons learned from the evaluation of Portuguese language models" | |
| # by Ruan Chaves Rodrigues (2023) - Master's dissertation, University of Malta | |
| # Available at: https://www.um.edu.mt/library/oar/handle/123456789/120557 | |
| # Data Sources | |
| sources: | |
| napolab_thesis: | |
| name: "Napolab Thesis" | |
| description: "Lessons learned from the evaluation of Portuguese language models" | |
| author: "Ruan Chaves Rodrigues" | |
| year: 2023 | |
| url: "https://www.um.edu.mt/library/oar/handle/123456789/120557" | |
| institution: "University of Malta" | |
| open_pt_llm_leaderboard: | |
| name: "Open PT LLM Leaderboard" | |
| description: "Large Language Models on Portuguese Benchmarks" | |
| author: "Eduardo Garcia" | |
| year: 2025 | |
| url: "https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard" | |
| platform: "Hugging Face Spaces" | |
| teenytinyllama_paper: | |
| name: "TeenyTinyLlama Paper" | |
| description: "TeenyTinyLlama: Open-source tiny language models trained in Brazilian Portuguese" | |
| authors: ["Corrêa, Nicholas Kluge", "Falk, Sophia", "Fatimah, Shiza", "Sen, Aniket", "De Oliveira, Nythamar"] | |
| year: 2024 | |
| journal: "Machine Learning with Applications" | |
| doi: "10.1016/j.mlwa.2024.100558" | |
| # Dataset Information | |
| datasets: | |
| assin_rte: | |
| name: "ASSIN RTE" | |
| description: "Avaliação de Similaridade Semântica e Inferência Textual - RTE" | |
| tasks: ["RTE"] | |
| url: "https://huggingface.co/datasets/assin" | |
| assin_sts: | |
| name: "ASSIN STS" | |
| description: "Avaliação de Similaridade Semântica e Inferência Textual - STS" | |
| tasks: ["STS"] | |
| url: "https://huggingface.co/datasets/assin" | |
| assin2_rte: | |
| name: "ASSIN 2 RTE" | |
| description: "Avaliação de Similaridade Semântica e Inferência Textual (v2) - RTE" | |
| tasks: ["RTE"] | |
| url: "https://huggingface.co/datasets/assin2" | |
| assin2_sts: | |
| name: "ASSIN 2 STS" | |
| description: "Avaliação de Similaridade Semântica e Inferência Textual (v2) - STS" | |
| tasks: ["STS"] | |
| url: "https://huggingface.co/datasets/assin2" | |
| faquad-nli: | |
| name: "FaQUaD-NLI" | |
| description: "Factual Question Answering and Natural Language Inference" | |
| tasks: ["NLI"] | |
| url: "https://huggingface.co/datasets/ruanchaves/faquad-nli" | |
| hatebr: | |
| name: "HateBR" | |
| description: "Hate Speech Detection in Brazilian Portuguese" | |
| tasks: ["Classification"] | |
| url: "https://huggingface.co/datasets/ruanchaves/hatebr" | |
| porsimplessent: | |
| name: "PorSimplesSent" | |
| description: "Portuguese Simple Sentences Sentiment Analysis" | |
| tasks: ["Sentiment Analysis"] | |
| url: "https://huggingface.co/datasets/ruanchaves/porsimplessent" | |
| reli-sa: | |
| name: "Reli-SA" | |
| description: "Religious Sentiment Analysis" | |
| tasks: ["Sentiment Analysis"] | |
| url: "https://huggingface.co/datasets/ruanchaves/reli-sa" | |
| # Benchmark Results | |
| benchmark_results: | |
| assin_rte: | |
| albertina-pt-pt: | |
| accuracy: 0.887 | |
| albertina-pt-br: | |
| accuracy: 0.844 | |
| deberta-v2-large: | |
| accuracy: 0.864 | |
| xlm-roberta-large: | |
| accuracy: 0.874 | |
| mdeberta-v3-base: | |
| accuracy: 0.863 | |
| bertimbau-large: | |
| accuracy: 0.838 | |
| bert-large: | |
| accuracy: 0.802 | |
| bertimbau-base: | |
| accuracy: 0.828 | |
| bert-multilingual-base: | |
| accuracy: 0.815 | |
| xlm-roberta-base: | |
| accuracy: 0.822 | |
| bertinho: | |
| accuracy: 0.786 | |
| ixaes: | |
| accuracy: 0.782 | |
| assin_sts: | |
| albertina-pt-pt: | |
| accuracy: 0.874 | |
| albertina-pt-br: | |
| accuracy: 0.883 | |
| deberta-v2-large: | |
| accuracy: 0.861 | |
| xlm-roberta-large: | |
| accuracy: 0.863 | |
| mdeberta-v3-base: | |
| accuracy: 0.855 | |
| bertimbau-large: | |
| accuracy: 0.826 | |
| bert-large: | |
| accuracy: 0.822 | |
| bertimbau-base: | |
| accuracy: 0.844 | |
| bert-multilingual-base: | |
| accuracy: 0.820 | |
| xlm-roberta-base: | |
| accuracy: 0.812 | |
| bertinho: | |
| accuracy: 0.791 | |
| ixaes: | |
| accuracy: 0.817 | |
| assin2_rte: | |
| albertina-pt-pt: | |
| accuracy: 0.910 | |
| albertina-pt-br: | |
| accuracy: 0.916 | |
| deberta-v2-large: | |
| accuracy: 0.911 | |
| xlm-roberta-large: | |
| accuracy: 0.910 | |
| mdeberta-v3-base: | |
| accuracy: 0.904 | |
| bertimbau-large: | |
| accuracy: 0.897 | |
| bert-large: | |
| accuracy: 0.892 | |
| bertimbau-base: | |
| accuracy: 0.884 | |
| bert-multilingual-base: | |
| accuracy: 0.877 | |
| xlm-roberta-base: | |
| accuracy: 0.875 | |
| bertinho: | |
| accuracy: 0.855 | |
| ixaes: | |
| accuracy: 0.879 | |
| ttl-460m: | |
| accuracy: 0.8643 | |
| ttl-160m: | |
| accuracy: 0.8578 | |
| assin2_sts: | |
| deberta-v2-large: | |
| accuracy: 0.724 | |
| mdeberta-v3-base: | |
| accuracy: 0.847 | |
| bertimbau-large: | |
| accuracy: 0.855 | |
| bert-large: | |
| accuracy: 0.792 | |
| bertimbau-base: | |
| accuracy: 0.840 | |
| bert-multilingual-base: | |
| accuracy: 0.827 | |
| xlm-roberta-base: | |
| accuracy: 0.847 | |
| bertinho: | |
| accuracy: 0.802 | |
| ixaes: | |
| accuracy: 0.822 | |
| faquad-nli: | |
| mdeberta-v3-base: | |
| accuracy: 0.889 | |
| bertimbau-large: | |
| accuracy: 0.900 | |
| bert-large: | |
| accuracy: 0.838 | |
| bertimbau-base: | |
| accuracy: 0.897 | |
| bert-multilingual-base: | |
| accuracy: 0.865 | |
| xlm-roberta-base: | |
| accuracy: 0.898 | |
| bertinho: | |
| accuracy: 0.866 | |
| ixaes: | |
| accuracy: 0.860 | |
| ttl-460m: | |
| accuracy: 0.9118 | |
| ttl-160m: | |
| accuracy: 0.9000 | |
| hatebr: | |
| mdeberta-v3-base: | |
| accuracy: 0.911 | |
| bertimbau-large: | |
| accuracy: 0.919 | |
| bert-large: | |
| accuracy: 0.838 | |
| bertimbau-base: | |
| accuracy: 0.920 | |
| bert-multilingual-base: | |
| accuracy: 0.871 | |
| xlm-roberta-base: | |
| accuracy: 0.920 | |
| bertinho: | |
| accuracy: 0.879 | |
| ixaes: | |
| accuracy: 0.872 | |
| ttl-460m: | |
| accuracy: 0.9228 | |
| ttl-160m: | |
| accuracy: 0.9071 | |
| porsimplessent: | |
| mdeberta-v3-base: | |
| accuracy: 0.953 | |
| bertimbau-large: | |
| accuracy: 0.919 | |
| bert-large: | |
| accuracy: 0.907 | |
| bertimbau-base: | |
| accuracy: 0.920 | |
| bert-multilingual-base: | |
| accuracy: 0.933 | |
| xlm-roberta-base: | |
| accuracy: 0.920 | |
| bertinho: | |
| accuracy: 0.900 | |
| ixaes: | |
| accuracy: 0.899 | |
| reli-sa: | |
| mdeberta-v3-base: | |
| accuracy: 0.719 | |
| bertimbau-large: | |
| accuracy: 0.745 | |
| bert-large: | |
| accuracy: 0.629 | |
| bertimbau-base: | |
| accuracy: 0.713 | |
| bert-multilingual-base: | |
| accuracy: 0.642 | |
| xlm-roberta-base: | |
| accuracy: 0.680 | |
| bertinho: | |
| accuracy: 0.681 | |
| ixaes: | |
| accuracy: 0.637 | |
| # Model Metadata | |
| model_metadata: | |
| albertina-pt-pt: | |
| parameters: 125000000 | |
| architecture: "Albertina PT:PT" | |
| base_model: "PORTULAN/albertina-ptpt" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/PORTULAN/albertina-ptpt" | |
| source: "napolab_thesis" | |
| albertina-pt-br: | |
| parameters: 125000000 | |
| architecture: "Albertina PT:BR" | |
| base_model: "PORTULAN/albertina-ptbr" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/PORTULAN/albertina-ptbr" | |
| source: "napolab_thesis" | |
| deberta-v2-large: | |
| parameters: 900000000 | |
| architecture: "DeBERTa v2 (large)" | |
| base_model: "microsoft/deberta-v2-large" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/microsoft/deberta-v2-large" | |
| source: "napolab_thesis" | |
| xlm-roberta-large: | |
| parameters: 550000000 | |
| architecture: "XLM-RoBERTa (large)" | |
| base_model: "xlm-roberta-large" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/xlm-roberta-large" | |
| source: "napolab_thesis" | |
| mdeberta-v3-base: | |
| parameters: 86000000 | |
| architecture: "mDeBERTa v3 (base)" | |
| base_model: "microsoft/mdeberta-v3-base" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/microsoft/mdeberta-v3-base" | |
| source: "napolab_thesis" | |
| bertimbau-large: | |
| parameters: 355000000 | |
| architecture: "BERTimbau (large)" | |
| base_model: "neuralmind/bert-large-portuguese-cased" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/neuralmind/bert-large-portuguese-cased" | |
| source: "napolab_thesis" | |
| bert-large: | |
| parameters: 355000000 | |
| architecture: "BERT (large)" | |
| base_model: "bert-large-uncased" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/bert-large-uncased" | |
| source: "napolab_thesis" | |
| bertimbau-base: | |
| parameters: 110000000 | |
| architecture: "BERTimbau (base)" | |
| base_model: "neuralmind/bert-base-portuguese-cased" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/neuralmind/bert-base-portuguese-cased" | |
| source: "napolab_thesis" | |
| bert-multilingual-base: | |
| parameters: 110000000 | |
| architecture: "BERT multilingual (base)" | |
| base_model: "bert-base-multilingual-cased" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/bert-base-multilingual-cased" | |
| source: "napolab_thesis" | |
| xlm-roberta-base: | |
| parameters: 270000000 | |
| architecture: "XLM-RoBERTa (base)" | |
| base_model: "xlm-roberta-base" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/xlm-roberta-base" | |
| source: "napolab_thesis" | |
| bertinho: | |
| parameters: 110000000 | |
| architecture: "Bertinho" | |
| base_model: "ricardo-filho/bertinho-portuguese-cased-nli-assin-2" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/ricardo-filho/bertinho-portuguese-cased-nli-assin-2" | |
| source: "napolab_thesis" | |
| ixaes: | |
| parameters: 110000000 | |
| architecture: "IXAes" | |
| base_model: "ixa-ehu/ixambert-base-cased" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/ixa-ehu/ixambert-base-cased" | |
| source: "napolab_thesis" | |
| ttl-460m: | |
| parameters: 460000000 | |
| architecture: "TeenyTinyLlama (460M)" | |
| base_model: "nicholasKluge/TeenyTinyLlama-460m" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/nicholasKluge/TeenyTinyLlama-460m" | |
| source: "teenytinyllama_paper" | |
| ttl-160m: | |
| parameters: 160000000 | |
| architecture: "TeenyTinyLlama (160M)" | |
| base_model: "nicholasKluge/TeenyTinyLlama-160m" | |
| task: "Multiple" | |
| huggingface_url: "https://huggingface.co/nicholasKluge/TeenyTinyLlama-160m" | |
| source: "teenytinyllama_paper" | |
| # Additional Models (for Model Hub tab) | |
| additional_models: | |
| albertina_models: | |
| albertina-pt-pt: | |
| huggingface_url: "https://huggingface.co/PORTULAN/albertina-ptpt" | |
| albertina-pt-br: | |
| huggingface_url: "https://huggingface.co/PORTULAN/albertina-ptbr" | |
| deberta_models: | |
| deberta-v2-large: | |
| huggingface_url: "https://huggingface.co/microsoft/deberta-v2-large" | |
| mdeberta-v3-base: | |
| huggingface_url: "https://huggingface.co/microsoft/mdeberta-v3-base" | |
| roberta_models: | |
| xlm-roberta-large: | |
| huggingface_url: "https://huggingface.co/xlm-roberta-large" | |
| xlm-roberta-base: | |
| huggingface_url: "https://huggingface.co/xlm-roberta-base" | |
| bert_models: | |
| bertimbau-large: | |
| huggingface_url: "https://huggingface.co/neuralmind/bert-large-portuguese-cased" | |
| bertimbau-base: | |
| huggingface_url: "https://huggingface.co/neuralmind/bert-base-portuguese-cased" | |
| bert-large: | |
| huggingface_url: "https://huggingface.co/bert-large-uncased" | |
| bert-multilingual-base: | |
| huggingface_url: "https://huggingface.co/bert-base-multilingual-cased" | |
| specialized_models: | |
| bertinho: | |
| huggingface_url: "https://huggingface.co/ricardo-filho/bertinho-portuguese-cased-nli-assin-2" | |
| ixaes: | |
| huggingface_url: "https://huggingface.co/ixa-ehu/ixambert-base-cased" | |
| teenytinyllama_models: | |
| ttl-460m: | |
| huggingface_url: "https://huggingface.co/nicholasKluge/TeenyTinyLlama-460m" | |
| ttl-160m: | |
| huggingface_url: "https://huggingface.co/nicholasKluge/TeenyTinyLlama-160m" |