| import streamlit as st | |
| from draw_utils import PAGE_MARKDOWN, PAGE_INFO, LENGTHS | |
| from draw_utils import load_results, style_dataframe | |
| st.set_page_config(layout="wide", page_title="Leaderboard App") | |
| st.markdown(PAGE_MARKDOWN, unsafe_allow_html=True) | |
| def draw_leaderboard(): | |
| df = load_results() | |
| tasks = ['avg'] + [f"qa{i}" for i in range(1, 11)] | |
| columns = ["model_name", "avg(32k)", "avg(128k)"] + LENGTHS | |
| st.title("πππͺ‘πβ BABILong Leaderboard π") | |
| st.markdown(PAGE_INFO) | |
| st.subheader("Average Accuracy") | |
| search_term = st.text_input("Search models:", "") | |
| tabs = st.tabs([str(task) for task in tasks]) | |
| for i, tab in enumerate(tabs): | |
| with tab: | |
| task_df = df[df.task == tasks[i]][columns] | |
| if search_term: | |
| task_df = task_df[task_df['model_name'].str.contains(search_term, case=False)] | |
| task_df.reset_index(drop=True, inplace=True) | |
| row_height = 35 | |
| height = (len(task_df) + 1) * row_height | |
| styled = style_dataframe(task_df).format(precision=2) | |
| st.dataframe( | |
| styled, | |
| use_container_width=True, | |
| height=height, | |
| ) | |
| if __name__ == "__main__": | |
| draw_leaderboard() | |