Spaces:
Running
Running
update scripts
Browse files- app.py +42 -1
- src/display/about.py +2 -1
app.py
CHANGED
@@ -34,12 +34,53 @@ def restart_space():
|
|
34 |
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results.csv'
|
35 |
df_m3exam, df_mmlu, df_avg = load_data(csv_path)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
demo = gr.Blocks(css=custom_css)
|
38 |
with demo:
|
39 |
gr.HTML(TITLE)
|
40 |
-
|
41 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
42 |
with gr.TabItem("π
Overall", elem_id="llm-benchmark-Sum", id=0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
leaderboard_table = gr.components.Dataframe(
|
44 |
value=df_avg,
|
45 |
# value=leaderboard_df[
|
|
|
34 |
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results.csv'
|
35 |
df_m3exam, df_mmlu, df_avg = load_data(csv_path)
|
36 |
|
37 |
+
# Searching and filtering
|
38 |
+
def update_table(
|
39 |
+
hidden_df: pd.DataFrame,
|
40 |
+
# columns: list,
|
41 |
+
# type_query: list,
|
42 |
+
# precision_query: str,
|
43 |
+
# size_query: list,
|
44 |
+
# show_deleted: bool,
|
45 |
+
query: str,
|
46 |
+
):
|
47 |
+
# filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
48 |
+
# filtered_df = filter_queries(query, filtered_df)
|
49 |
+
# df = select_columns(filtered_df, columns)
|
50 |
+
df = filter_queries(query, hidden_df)
|
51 |
+
return df
|
52 |
+
|
53 |
+
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
54 |
+
return df[(df['Model'].str.contains(query, case=False))]
|
55 |
+
|
56 |
+
def filter_queries(query: str, df: pd.DataFrame) -> pd.DataFrame:
|
57 |
+
final_df = []
|
58 |
+
if query != "":
|
59 |
+
queries = [q.strip() for q in query.split(";")]
|
60 |
+
for _q in queries:
|
61 |
+
_q = _q.strip()
|
62 |
+
if _q != "":
|
63 |
+
temp_filtered_df = search_table(df, _q)
|
64 |
+
if len(temp_filtered_df) > 0:
|
65 |
+
final_df.append(temp_filtered_df)
|
66 |
+
if len(final_df) > 0:
|
67 |
+
filtered_df = pd.concat(final_df)
|
68 |
+
|
69 |
+
return filtered_df
|
70 |
+
|
71 |
demo = gr.Blocks(css=custom_css)
|
72 |
with demo:
|
73 |
gr.HTML(TITLE)
|
74 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
75 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
76 |
with gr.TabItem("π
Overall", elem_id="llm-benchmark-Sum", id=0):
|
77 |
+
with gr.Row():
|
78 |
+
search_bar = gr.Textbox(
|
79 |
+
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
80 |
+
show_label=False,
|
81 |
+
elem_id="search-bar",
|
82 |
+
)
|
83 |
+
|
84 |
leaderboard_table = gr.components.Dataframe(
|
85 |
value=df_avg,
|
86 |
# value=leaderboard_df[
|
src/display/about.py
CHANGED
@@ -20,7 +20,8 @@ TITLE = """<h1 align="center" id="space-title">SeaExam Leaderboard</h1>"""
|
|
20 |
|
21 |
# What does your leaderboard evaluate?
|
22 |
INTRODUCTION_TEXT = """
|
23 |
-
|
|
|
24 |
"""
|
25 |
|
26 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
20 |
|
21 |
# What does your leaderboard evaluate?
|
22 |
INTRODUCTION_TEXT = """
|
23 |
+
π’: pre-trained
|
24 |
+
πΆ: fine-tuned
|
25 |
"""
|
26 |
|
27 |
# Which evaluations are you running? how can people reproduce what you have?
|