|  | import os | 
					
						
						|  | import re | 
					
						
						|  | import streamlit as st | 
					
						
						|  | import requests | 
					
						
						|  | import pandas as pd | 
					
						
						|  | from io import StringIO | 
					
						
						|  | import plotly.graph_objs as go | 
					
						
						|  | from huggingface_hub import HfApi | 
					
						
						|  | from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError | 
					
						
						|  | from dotenv import load_dotenv | 
					
						
						|  |  | 
					
						
						|  | load_dotenv() | 
					
						
						|  |  | 
					
						
						|  | SERVER_URL = os.getenv("SERVER_URL") | 
					
						
						|  |  | 
					
						
						|  | def get_data(): | 
					
						
						|  | response = requests.get(SERVER_URL) | 
					
						
						|  | data = response.json() | 
					
						
						|  | return data | 
					
						
						|  |  | 
					
						
						|  | def main(): | 
					
						
						|  |  | 
					
						
						|  | st.set_page_config(page_title="Indic LLM Leaderboard", layout="wide") | 
					
						
						|  |  | 
					
						
						|  | title_column, refresh_column = st.columns([.92, 0.08]) | 
					
						
						|  | with title_column: | 
					
						
						|  | st.title("Indic LLM Leaderboard (Ξ±)") | 
					
						
						|  | st.markdown("The Indic Eval Leaderboard utilizes the [indic_eval](https://github.com/adithya-s-k/indic_eval) evaluation framework , incorporating SOTA translated benchmarks like ARC, Hellaswag, MMLU, among others. Supporting 7 Indic languages, it offers a comprehensive platform for assessing model performance and comparing results within the Indic language modeling landscape.") | 
					
						
						|  | with refresh_column: | 
					
						
						|  | if st.button("Refresh", type="primary"): | 
					
						
						|  | data = get_data() | 
					
						
						|  |  | 
					
						
						|  | Leaderboard_tab, About_tab ,FAQ_tab, Submit_tab = st.tabs(["π
 Leaderboard", "π About" , "βFAQ","π Submit"]) | 
					
						
						|  |  | 
					
						
						|  | with Leaderboard_tab: | 
					
						
						|  | data = get_data() | 
					
						
						|  |  | 
					
						
						|  | table_data = [] | 
					
						
						|  | all_models = [] | 
					
						
						|  |  | 
					
						
						|  | for item in data: | 
					
						
						|  | model_name = item.get("name") | 
					
						
						|  | language = item.get("language") | 
					
						
						|  | try: | 
					
						
						|  | ALL = item["result"]["all"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | ALL = None | 
					
						
						|  | try: | 
					
						
						|  | ARC_Easy = item["result"]["ARC-Easy"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | ARC_Easy = None | 
					
						
						|  | try: | 
					
						
						|  | ARC_Challenge = item["result"]["ARC-Challenge"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | ARC_Challenge = None | 
					
						
						|  | try: | 
					
						
						|  | Hellaswag = item["result"]["Hellaswag"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | Hellaswag = None | 
					
						
						|  | try: | 
					
						
						|  | Boolq = item["result"]["Boolq"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | Boolq = None | 
					
						
						|  | try: | 
					
						
						|  | MMLU = item["result"]["MMLU"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | MMLU = None | 
					
						
						|  | try: | 
					
						
						|  | Winograde = item["result"]["Winograde"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | Winograde = None | 
					
						
						|  | try: | 
					
						
						|  | Translation = item["result"]["Translation"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | Translation = None | 
					
						
						|  | try: | 
					
						
						|  | Generation = item["result"]["Generation"]["acc_norm"] | 
					
						
						|  | except KeyError: | 
					
						
						|  | Generation = None | 
					
						
						|  |  | 
					
						
						|  | all_models.append(model_name) | 
					
						
						|  | table_data.append({ | 
					
						
						|  | "Model Name": model_name, | 
					
						
						|  | "Language": language, | 
					
						
						|  | "Avergae": ALL, | 
					
						
						|  | "ARC-Easy": ARC_Easy, | 
					
						
						|  | "ARC-Challenge": ARC_Challenge, | 
					
						
						|  | "Hellaswag": Hellaswag, | 
					
						
						|  | "Boolq": Boolq, | 
					
						
						|  | "MMLU": MMLU, | 
					
						
						|  | "Winograde": Winograde, | 
					
						
						|  | "Translation": Translation, | 
					
						
						|  | "Generation": Generation | 
					
						
						|  | }) | 
					
						
						|  |  | 
					
						
						|  | df = pd.DataFrame(table_data) | 
					
						
						|  |  | 
					
						
						|  | title = st.text_input('Model Name', placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...") | 
					
						
						|  |  | 
					
						
						|  | col1, col2 = st.columns(2) | 
					
						
						|  | with col1: | 
					
						
						|  | benchmark_options = st.multiselect( | 
					
						
						|  | 'Pick Benchmark', | 
					
						
						|  | ['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU','Winogrande','Translation','Generation'],['ARC-Easy', 'ARC-Challenge', 'Hellaswag', 'Boolq','MMLU']) | 
					
						
						|  | with col2: | 
					
						
						|  | language_options = st.multiselect( | 
					
						
						|  | 'Pick Languages', | 
					
						
						|  | ['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam'],['kannada', 'hindi', 'tamil', 'telegu','gujarathi','marathi','malayalam']) | 
					
						
						|  |  | 
					
						
						|  | if title: | 
					
						
						|  | if ';' in title: | 
					
						
						|  | model_names = [name.strip() for name in title.split(';')] | 
					
						
						|  | filtered_df = df[df['Model Name'].isin(model_names)] | 
					
						
						|  | else: | 
					
						
						|  | filtered_df = df[df['Model Name'].str.contains(title, case=False, na=False)] | 
					
						
						|  |  | 
					
						
						|  | filtered_df = filtered_df[filtered_df['Language'].isin(language_options)] | 
					
						
						|  | filtered_df = filtered_df[df.columns.intersection(['Model Name', 'Language'] + benchmark_options)] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.dataframe(filtered_df, use_container_width=True) | 
					
						
						|  | elif benchmark_options or language_options: | 
					
						
						|  | filtered_df = df[df['Language'].isin(language_options)] | 
					
						
						|  | filtered_df = filtered_df[df.columns.intersection(['Model Name', 'Language'] + benchmark_options)] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | filtered_df['Average'] = filtered_df[benchmark_options].mean(axis=1) | 
					
						
						|  |  | 
					
						
						|  | st.dataframe(filtered_df, use_container_width=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | compare_models = st.multiselect( | 
					
						
						|  | 'Pick Models to compare them', | 
					
						
						|  | df['Model Name'].unique() | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if compare_models: | 
					
						
						|  | compare_data = [] | 
					
						
						|  | for model in compare_models: | 
					
						
						|  | model_data = df[df['Model Name'] == model] | 
					
						
						|  | compare_data.append(model_data) | 
					
						
						|  | if compare_data: | 
					
						
						|  | compare_df = pd.concat(compare_data) | 
					
						
						|  | compare_df['Average'] = compare_df[benchmark_options].mean(axis=1) | 
					
						
						|  | st.dataframe(compare_df, use_container_width=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with About_tab: | 
					
						
						|  | st.markdown(''' | 
					
						
						|  | ### About Indic LLM Leaderboard | 
					
						
						|  |  | 
					
						
						|  | ### Indic Eval | 
					
						
						|  |  | 
					
						
						|  | ### Contribute | 
					
						
						|  | ''') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with FAQ_tab: | 
					
						
						|  | st.markdown(''' | 
					
						
						|  | ### FAQ | 
					
						
						|  |  | 
					
						
						|  | ### SUBMISSIONS | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | ### RESULTS | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | ### EDITING SUBMISSIONS | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | ### OTHER | 
					
						
						|  | ''') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with Submit_tab: | 
					
						
						|  | st.markdown(''' | 
					
						
						|  | ### Submit Your Model | 
					
						
						|  | ''') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with st.expander(label="π Citation"): | 
					
						
						|  | code = ''' | 
					
						
						|  | @misc{indic-llm-leaderboard, | 
					
						
						|  | author = {Adithya S Kolavi}, | 
					
						
						|  | title = {Indic LLM Leaderboard}, | 
					
						
						|  | year = {2024}, | 
					
						
						|  | publisher = {Cognitivelab}, | 
					
						
						|  | howpublished = "url{https://huggingface.co/spaces/Cognitive-Lab/indic_llm_leaderboard}", | 
					
						
						|  | } | 
					
						
						|  | ''' | 
					
						
						|  | st.code(code, language='python') | 
					
						
						|  |  | 
					
						
						|  | if __name__ == "__main__": | 
					
						
						|  | main() |