Commit
Β·
535d0d5
1
Parent(s):
657d04f
Update app
Browse files- app.py +25 -19
- src/submission/submit.py +1 -0
app.py
CHANGED
|
@@ -30,29 +30,36 @@ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, TOKEN, QUEUE_RE
|
|
| 30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
| 31 |
from src.submission.submit import add_new_eval
|
| 32 |
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def restart_space():
|
| 35 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
try:
|
| 39 |
-
print(EVAL_REQUESTS_PATH)
|
| 40 |
-
snapshot_download(
|
| 41 |
-
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
| 42 |
-
)
|
| 43 |
-
except Exception:
|
| 44 |
-
restart_space()
|
| 45 |
-
try:
|
| 46 |
-
print(EVAL_RESULTS_PATH)
|
| 47 |
-
snapshot_download(
|
| 48 |
-
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
| 49 |
-
)
|
| 50 |
-
except Exception:
|
| 51 |
-
restart_space()
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
leaderboard_df = original_df.copy()
|
| 56 |
|
| 57 |
"""
|
| 58 |
(
|
|
@@ -149,7 +156,7 @@ with demo:
|
|
| 149 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 150 |
|
| 151 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 152 |
-
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 153 |
with gr.Row():
|
| 154 |
with gr.Column():
|
| 155 |
with gr.Row():
|
|
@@ -243,7 +250,6 @@ with demo:
|
|
| 243 |
leaderboard_table,
|
| 244 |
queue=True,
|
| 245 |
)
|
| 246 |
-
|
| 247 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
| 248 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 249 |
|
|
@@ -335,7 +341,7 @@ with demo:
|
|
| 335 |
model_name_textbox,
|
| 336 |
upload_button
|
| 337 |
],
|
| 338 |
-
submission_result,
|
| 339 |
)
|
| 340 |
|
| 341 |
with gr.Row():
|
|
|
|
| 30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
| 31 |
from src.submission.submit import add_new_eval
|
| 32 |
|
| 33 |
+
original_df = None
|
| 34 |
+
leaderboard_df = None
|
| 35 |
|
| 36 |
def restart_space():
|
| 37 |
API.restart_space(repo_id=REPO_ID, token=TOKEN)
|
| 38 |
|
| 39 |
+
def download_data():
|
| 40 |
+
global original_df
|
| 41 |
+
global leaderboard_df
|
| 42 |
+
try:
|
| 43 |
+
print(EVAL_REQUESTS_PATH)
|
| 44 |
+
snapshot_download(
|
| 45 |
+
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
| 46 |
+
)
|
| 47 |
+
except Exception:
|
| 48 |
+
restart_space()
|
| 49 |
+
try:
|
| 50 |
+
print(EVAL_RESULTS_PATH)
|
| 51 |
+
snapshot_download(
|
| 52 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
| 53 |
+
)
|
| 54 |
+
except Exception:
|
| 55 |
+
restart_space()
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
_, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
| 59 |
+
leaderboard_df = original_df.copy()
|
| 60 |
+
|
| 61 |
|
| 62 |
+
download_data()
|
|
|
|
| 63 |
|
| 64 |
"""
|
| 65 |
(
|
|
|
|
| 156 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 157 |
|
| 158 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 159 |
+
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0) as tb_board:
|
| 160 |
with gr.Row():
|
| 161 |
with gr.Column():
|
| 162 |
with gr.Row():
|
|
|
|
| 250 |
leaderboard_table,
|
| 251 |
queue=True,
|
| 252 |
)
|
|
|
|
| 253 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
| 254 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 255 |
|
|
|
|
| 341 |
model_name_textbox,
|
| 342 |
upload_button
|
| 343 |
],
|
| 344 |
+
[submission_result],
|
| 345 |
)
|
| 346 |
|
| 347 |
with gr.Row():
|
src/submission/submit.py
CHANGED
|
@@ -2,6 +2,7 @@ import json
|
|
| 2 |
import os
|
| 3 |
from datetime import datetime, timezone
|
| 4 |
import numpy as np
|
|
|
|
| 5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
| 6 |
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO
|
| 7 |
from src.submission.check_validity import (
|
|
|
|
| 2 |
import os
|
| 3 |
from datetime import datetime, timezone
|
| 4 |
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
| 7 |
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO
|
| 8 |
from src.submission.check_validity import (
|