atlas5301 commited on
Commit
064c454
·
1 Parent(s): 9648ca4

improve links and style

Browse files
data/long_context.csv CHANGED
@@ -1,11 +1,11 @@
1
- Model,8K,16K,32K,Average
2
- gemini-1.5-pro-002,1182.43,896.31,812.96,963.9
3
- qwen-2.5-72b-instruct,927.33,681.53,563.65,724.17
4
- mistral-large-2411,914.49,563.73,319.21,599.14
5
- deepseek-v3,935.10,477.02,313.66,575.2
6
- gemini-1.5-flash-002,673.88,476.72,377.38,509.3
7
- llama-3.1-70b-instruct,479.00,394.50,355.5,409.67
8
- minimax-text-01,481.32,359.56,325.95,388.94
9
- gpt-4o-mini,401.00,337.81,275.63,338.15
10
- qwen-2.5-7b-instruct,248.00,211.50,196.17,218.56
11
- llama-3.1-8b-instruct,183.67,149.50,109.45,147.54
 
1
+ Model,8K,16K,32K,Average↑,Link
2
+ gemini-1.5-pro-002,1182.43,896.31,812.96,963.9,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-002
3
+ qwen-2.5-72b-instruct,927.33,681.53,563.65,724.17,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct
4
+ mistral-large-2411,914.49,563.73,319.21,599.14,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411
5
+ deepseek-v3,935.10,477.02,313.66,575.2,https://huggingface.co/deepseek-ai/DeepSeek-V3
6
+ gemini-1.5-flash-002,673.88,476.72,377.38,509.3,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-002
7
+ llama-3.1-70b-instruct,479.00,394.50,355.5,409.67,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
8
+ minimax-text-01,481.32,359.56,325.95,388.94,https://huggingface.co/MiniMaxAI/MiniMax-Text-01
9
+ gpt-4o-mini,401.00,337.81,275.63,338.15,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/
10
+ qwen-2.5-7b-instruct,248.00,211.50,196.17,218.56,https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
11
+ llama-3.1-8b-instruct,183.67,149.50,109.45,147.54,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
data/zero_context.csv CHANGED
@@ -1,19 +1,19 @@
1
- Model,Row Color,Symbolic,Medium,Hard,1st<50% op,1st<10% op,Avg. Acc op≤30,Average
2
- deepseek-r1,yellow,7280.0,9750.85,8573.8,100,130,0.9427,8534.88
3
- o1-mini,yellow,5060.0,6054.91,3738.43,50,90,0.8397,4951.11
4
- deepseek-v3,None,4310.0,4100.81,2407.86,24,55,0.6669,3606.22
5
- qwq-32b-preview,yellow,3530.0,3205.75,1846.19,21,50,0.5403,2860.65
6
- gemini-1.5-pro-002,None,2547.0,3659.59,2318.28,26,45,0.6924,2841.62
7
- claude-3.5-sonnet,None,2161.0,3281.8,2115.79,26,40,0.6758,2519.53
8
- mistral-large-2411,None,2332.5,2879.92,2310.49,24,50,0.6645,2507.64
9
- qwen-2.5-72b-instruct,None,2048.0,2496.81,2016.38,21,40,0.5433,2187.06
10
- gpt-4o-2024-11-20,None,2379.0,2457.37,1451.54,18,30,0.5064,2095.97
11
- gemini-1.5-flash-002,None,1970.0,1478.75,1274.25,13,30,0.4460,1574.33
12
- llama-3.1-70b-instruct,None,1769.0,1650.25,1205.25,15,30,0.4314,1541.50
13
- minimax-text-01,green,1618.5,1712.64,1178.51,14,30,0.4213,1503.22
14
- llama-3.1-405b-instruct,None,1557.0,1321.54,950.0,11,20,0.3409,1276.18
15
- gpt-4o-mini,None,1389.0,1406.5,913.89,12,22,0.3094,1236.46
16
- claude-3.5-haiku,None,897.0,1053.16,784.34,10,22,0.2910,911.50
17
- qwen-2.5-7b-instruct,None,786.95,886.75,618.5,7,19,0.2257,764.07
18
- llama-3.1-8b-instruct,None,462.0,786.5,606.5,6,17,0.2186,618.30
19
- jamba-1.5-large,blue,856.0,485.13,466.4,6,26,0.1828,602.51
 
1
+ Model,Row Color,Symbolic,Medium,Hard,1st<50% op,1st<10% op,Avg. Acc op≤30,Average↑,Link
2
+ deepseek-r1,yellow,7280.0,9750.85,8573.8,100,130,0.9427,8534.88,https://huggingface.co/deepseek-ai/DeepSeek-V3
3
+ o1-mini,yellow,5060.0,6054.91,3738.43,50,90,0.8397,4951.11,https://platform.openai.com/docs/models/o1
4
+ deepseek-v3,None,4310.0,4100.81,2407.86,24,55,0.6669,3606.22,https://huggingface.co/deepseek-ai/DeepSeek-V3
5
+ qwq-32b-preview,yellow,3530.0,3205.75,1846.19,21,50,0.5403,2860.65,https://huggingface.co/Qwen/QwQ-32B-Preview
6
+ gemini-1.5-pro-002,None,2547.0,3659.59,2318.28,26,45,0.6924,2841.62,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-pro-002
7
+ claude-3.5-sonnet,None,2161.0,3281.8,2115.79,26,40,0.6758,2519.53,https://www.anthropic.com/news/3-5-models-and-computer-use
8
+ mistral-large-2411,None,2332.5,2879.92,2310.49,24,50,0.6645,2507.64,https://huggingface.co/mistralai/Mistral-Large-Instruct-2411
9
+ qwen-2.5-72b-instruct,None,2048.0,2496.81,2016.38,21,40,0.5433,2187.06,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct
10
+ gpt-4o-2024-11-20,None,2379.0,2457.37,1451.54,18,30,0.5064,2095.97,https://platform.openai.com/docs/models/gpt-4o#gpt-4o
11
+ gemini-1.5-flash-002,None,1970.0,1478.75,1274.25,13,30,0.4460,1574.33,https://aistudio.google.com/app/prompts/new_chat?model=gemini-1.5-flash-002
12
+ llama-3.1-70b-instruct,None,1769.0,1650.25,1205.25,15,30,0.4314,1541.50,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct
13
+ minimax-text-01,green,1618.5,1712.64,1178.51,14,30,0.4213,1503.22,https://huggingface.co/MiniMaxAI/MiniMax-Text-01
14
+ llama-3.1-405b-instruct,None,1557.0,1321.54,950.0,11,20,0.3409,1276.18,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct
15
+ gpt-4o-mini,None,1389.0,1406.5,913.89,12,22,0.3094,1236.46,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/
16
+ claude-3.5-haiku,None,897.0,1053.16,784.34,10,22,0.2910,911.50,https://www.anthropic.com/news/3-5-models-and-computer-use
17
+ qwen-2.5-7b-instruct,None,786.95,886.75,618.5,7,19,0.2257,764.07,https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
18
+ llama-3.1-8b-instruct,None,462.0,786.5,606.5,6,17,0.2186,618.30,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct
19
+ jamba-1.5-large,blue,856.0,485.13,466.4,6,26,0.1828,602.51,https://huggingface.co/ai21labs/AI21-Jamba-1.5-Large
pages/long_context.py CHANGED
@@ -10,28 +10,14 @@ def load_data():
10
 
11
  def show():
12
  st.title("Long Context Leaderboard")
13
-
14
  # Load and style data
15
  df = load_data()
16
  styled_df = style_long_context(df)
17
 
18
- # Display the dataframe with built-in sort on column click
19
- st.dataframe(
20
- styled_df,
21
- use_container_width=True,
22
- height=35*(len(df)+1),
23
- hide_index=True,
24
- column_config={
25
- "Model": st.column_config.TextColumn(width="large"),
26
- "8K": st.column_config.NumberColumn(format="%.2f"),
27
- "16K": st.column_config.NumberColumn(format="%.2f"),
28
- "32K": st.column_config.NumberColumn(format="%.2f"),
29
- "Average↑": st.column_config.NumberColumn(
30
- format="%.2f",
31
- help="Average across all context lengths"
32
- )
33
- }
34
- )
35
 
36
  # Optionally, keep some explanatory text
37
  st.markdown("""
 
10
 
11
  def show():
12
  st.title("Long Context Leaderboard")
 
13
  # Load and style data
14
  df = load_data()
15
  styled_df = style_long_context(df)
16
 
17
+ st.markdown(styled_df, unsafe_allow_html=True) # No need to call to_html() again
18
+ # st.dataframe(styled_df, use_container_width=True)
19
+
20
+ # st.html(styled_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Optionally, keep some explanatory text
23
  st.markdown("""
pages/zero_noise.py CHANGED
@@ -16,27 +16,7 @@ def show():
16
 
17
  # Remove the manual sorting UI (selectbox, checkboxes) and let st.dataframe handle sorting.
18
  styled_df = style_zero_context(raw_df)
19
-
20
- # Directly show the dataframe
21
- st.dataframe(
22
- styled_df,
23
- use_container_width=True,
24
- hide_index=True,
25
- height=35*(1+len(raw_df)),
26
- column_config={
27
- "Model": st.column_config.TextColumn(width="large"),
28
- "Symbolic": st.column_config.NumberColumn(format="%.2f"),
29
- "Medium": st.column_config.NumberColumn(format="%.2f"),
30
- "Hard": st.column_config.NumberColumn(format="%.2f"),
31
- "1st<50% op": st.column_config.NumberColumn(format="%.0f"),
32
- "1st<10% op": st.column_config.NumberColumn(format="%.0f"),
33
- "Avg. Acc op≤30": st.column_config.NumberColumn(format="%.4f"),
34
- "Average↑": st.column_config.NumberColumn(
35
- format="%.2f",
36
- help="Average across all subsets"
37
- )
38
- }
39
- )
40
 
41
  # You can leave your explanation/description below
42
  st.markdown("""
 
16
 
17
  # Remove the manual sorting UI (selectbox, checkboxes) and let st.dataframe handle sorting.
18
  styled_df = style_zero_context(raw_df)
19
+ st.markdown(styled_df, unsafe_allow_html=True) # No need to call to_html() again
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # You can leave your explanation/description below
22
  st.markdown("""
utils/style.py CHANGED
@@ -1,11 +1,12 @@
1
  import pandas as pd
2
  import streamlit as st
 
3
  # Define color maps for both light and dark modes
4
  COLOR_MAP = {
5
  "light": {
6
- "yellow": "background-color: rgba(255, 255, 204, 0.5)", # Reasoning models
7
- "green": "background-color: rgba(227, 251, 233, 0.5)", # Linear attention hybrid
8
- "blue": "background-color: rgba(230, 244, 255, 0.5)" # SSM hybrid models
9
  },
10
  }
11
 
@@ -31,8 +32,12 @@ def style_zero_context(df):
31
  # Add any other special-cased models here
32
  # "o1-mini": COLOR_MAP["yellow"], etc.
33
  }
 
 
 
 
34
  styler = df.style.apply(
35
- lambda row: [color_mapping.get(row["Model"], "")]*len(row),
36
  axis=1
37
  )
38
 
@@ -57,6 +62,19 @@ def style_zero_context(df):
57
  "Average↑": "{:,.2f}" # Format as number with thousands separator and 2 decimal places
58
  })
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  return styler
62
  # Add styling for model types
@@ -65,13 +83,31 @@ def style_long_context(df):
65
  "minimax-text-01": get_color_map()["green"],
66
  "jamba-1.5-large": get_color_map()["blue"]
67
  }
 
 
 
68
 
69
- return df.style.apply(
70
- lambda row: [color_mapping.get(row["Model"], "")]*len(row),
71
  axis=1
72
  ).format({
73
  "8K": "{:,.2f}",
74
  "16K": "{:,.2f}",
75
  "32K": "{:,.2f}",
76
- "Average↑": "{:,.2f}"
77
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import streamlit as st
3
+ import re
4
  # Define color maps for both light and dark modes
5
  COLOR_MAP = {
6
  "light": {
7
+ "yellow": "background-color: rgba(255, 255, 128, 0.3)", # Reasoning models
8
+ "green": "background-color: rgba(192, 255, 192, 0.3)", # Linear attention hybrid
9
+ "blue": "background-color: rgba(192, 192, 255, 0.3)" # SSM hybrid models
10
  },
11
  }
12
 
 
32
  # Add any other special-cased models here
33
  # "o1-mini": COLOR_MAP["yellow"], etc.
34
  }
35
+ # Add links to model names
36
+ df["Model"] = df.apply(lambda row: f'<a href="{row["Link"]}" target="_blank">{row["Model"]}</a>', axis=1)
37
+ df.drop(columns=["Link"], inplace=True)
38
+
39
  styler = df.style.apply(
40
+ lambda row: [color_mapping.get(re.sub(r'<[^>]+>', '', row["Model"]), "")]*len(row),
41
  axis=1
42
  )
43
 
 
62
  "Average↑": "{:,.2f}" # Format as number with thousands separator and 2 decimal places
63
  })
64
 
65
+ html = styler.to_html(escape=False, index=False)
66
+
67
+ # Updated regex: target model name *before* link replacement and use word boundary
68
+ for model, style in color_mapping.items():
69
+ html = re.sub(rf'<tr[^>]*>\s*<td[^>]*>{re.escape(model)}<', rf'<tr style="{style}"><td>', html, re.M)
70
+
71
+ html = re.sub(
72
+ r'<table(.*?)>',
73
+ r'<table\1 style="width:100%; border-collapse:collapse;">',
74
+ html
75
+ )
76
+ return html # Return the modified HTML
77
+
78
 
79
  return styler
80
  # Add styling for model types
 
83
  "minimax-text-01": get_color_map()["green"],
84
  "jamba-1.5-large": get_color_map()["blue"]
85
  }
86
+
87
+ df["Model"] = df.apply(lambda row: f'<a href="{row["Link"]}" target="_blank">{row["Model"]}</a>', axis=1)
88
+ df.drop(columns=["Link"], inplace=True)
89
 
90
+ styled_df = df.style.apply(
91
+ lambda row: [color_mapping.get(re.sub(r'<[^>]+>', '', row["Model"]), "")]*len(row),
92
  axis=1
93
  ).format({
94
  "8K": "{:,.2f}",
95
  "16K": "{:,.2f}",
96
  "32K": "{:,.2f}",
97
+ "Average↑": "{:,.2f}",
98
+ })
99
+
100
+ # Convert to HTML and add <a> tags
101
+ html = styled_df.to_html(escape=False, index=False)
102
+
103
+ # Updated regex: target model name *before* link replacement and use word boundary
104
+ for model, style in color_mapping.items():
105
+ html = re.sub(rf'<tr[^>]*>\s*<td[^>]*>{re.escape(model)}<', rf'<tr style="{style}"><td>', html, re.M)
106
+
107
+ html = re.sub(
108
+ r'<table(.*?)>',
109
+ r'<table\1 style="width:100%; border-collapse:collapse;">',
110
+ html
111
+ )
112
+
113
+ return html # Return the modified HTML