Update app.py
Browse files
app.py
CHANGED
@@ -38,7 +38,7 @@ def filter_data(model, scenario):
|
|
38 |
return pd.DataFrame([pass_at_k])
|
39 |
|
40 |
# Initialize the leaderboard
|
41 |
-
def init_leaderboard(dataframe, default_selection=["Model", "pass@1"
|
42 |
if dataframe is None or dataframe.empty:
|
43 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
44 |
return Leaderboard(
|
@@ -67,10 +67,8 @@ duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(la
|
|
67 |
}, index=['pass@1'])).reset_index()
|
68 |
|
69 |
complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
70 |
-
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
|
71 |
-
|
72 |
-
'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
|
73 |
-
}, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
|
74 |
|
75 |
with gr.Blocks() as demo:
|
76 |
gr.Markdown("# 🏆 WebApp1K Models Leaderboard")
|
@@ -82,7 +80,7 @@ with gr.Blocks() as demo:
|
|
82 |
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
|
83 |
|
84 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
85 |
-
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = [
|
86 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
87 |
leaderboard = init_leaderboard(complete_pass_at_k, default_selection = [], height=800)
|
88 |
|
|
|
38 |
return pd.DataFrame([pass_at_k])
|
39 |
|
40 |
# Initialize the leaderboard
|
41 |
+
def init_leaderboard(dataframe, default_selection=["Model", "pass@1"], height=600):
|
42 |
if dataframe is None or dataframe.empty:
|
43 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
44 |
return Leaderboard(
|
|
|
67 |
}, index=['pass@1'])).reset_index()
|
68 |
|
69 |
complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
70 |
+
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
|
71 |
+
}, index=['pass@1'])).reset_index()
|
|
|
|
|
72 |
|
73 |
with gr.Blocks() as demo:
|
74 |
gr.Markdown("# 🏆 WebApp1K Models Leaderboard")
|
|
|
80 |
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
|
81 |
|
82 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
83 |
+
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = [], height=400)
|
84 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
85 |
leaderboard = init_leaderboard(complete_pass_at_k, default_selection = [], height=800)
|
86 |
|