|
import streamlit as st |
|
import pandas as pd |
|
from utils.style import style_zero_context |
|
|
|
@st.cache_data |
|
def load_data(): |
|
df = pd.read_csv("data/zero_context.csv") |
|
if "Row Color" in df.columns: |
|
df.drop(columns=["Row Color"], inplace=True) |
|
return df |
|
|
|
def show(): |
|
st.title("Zero Noise Leaderboard") |
|
|
|
raw_df = load_data() |
|
|
|
|
|
styled_df = style_zero_context(raw_df) |
|
st.markdown(styled_df, unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
**Colors**: |
|
- Yellow: reasoning model |
|
- Green: linear attention hybrid model |
|
- Blue: SSM-hybrid model |
|
|
|
**Benchmark Details**: |
|
- Evaluated on Symbolic, Medium, and Hard subtasks. |
|
- Area Under Curve(AUC) Metrics is Used to Compare between LLM Performance. |
|
- AUC is calculated using np.trapz function. |
|
""") |