Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
cache-management (#959)
Browse files- Force redownload new contents dataset (35850da85b6769eff642ee65db77be9bee902e2e)
- Fix comment string (2e50140dc360bd5a0ad9a44f8828d2d36e0201ad)
app.py
CHANGED
|
@@ -60,8 +60,12 @@ NEW_DATA_ON_LEADERBOARD = True
|
|
| 60 |
LEADERBOARD_DF = None
|
| 61 |
|
| 62 |
def restart_space():
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def time_diff_wrapper(func):
|
| 67 |
def wrapper(*args, **kwargs):
|
|
@@ -129,6 +133,9 @@ def get_latest_data_queue():
|
|
| 129 |
|
| 130 |
def init_space():
|
| 131 |
"""Initializes the application space, loading only necessary data."""
|
|
|
|
|
|
|
|
|
|
| 132 |
if DO_FULL_INIT:
|
| 133 |
# These downloads only occur on full initialization
|
| 134 |
try:
|
|
@@ -443,13 +450,16 @@ webhooks_server = enable_space_ci_and_return_server(ui=main_block)
|
|
| 443 |
# Add webhooks
|
| 444 |
@webhooks_server.add_webhook
|
| 445 |
def update_leaderboard(payload: WebhookPayload) -> None:
|
| 446 |
-
"""Redownloads the leaderboard dataset each time it updates"""
|
| 447 |
if payload.repo.type == "dataset" and payload.event.action == "update":
|
| 448 |
global NEW_DATA_ON_LEADERBOARD
|
| 449 |
if NEW_DATA_ON_LEADERBOARD:
|
|
|
|
| 450 |
return
|
|
|
|
| 451 |
NEW_DATA_ON_LEADERBOARD = True
|
| 452 |
|
|
|
|
| 453 |
datasets.load_dataset(
|
| 454 |
AGGREGATED_REPO,
|
| 455 |
"default",
|
|
@@ -458,6 +468,7 @@ def update_leaderboard(payload: WebhookPayload) -> None:
|
|
| 458 |
download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
|
| 459 |
verification_mode="no_checks"
|
| 460 |
)
|
|
|
|
| 461 |
|
| 462 |
# The below code is not used at the moment, as we can manage the queue file locally
|
| 463 |
LAST_UPDATE_QUEUE = datetime.datetime.now()
|
|
@@ -477,5 +488,6 @@ def update_queue(payload: WebhookPayload) -> None:
|
|
| 477 |
webhooks_server.launch()
|
| 478 |
|
| 479 |
scheduler = BackgroundScheduler()
|
| 480 |
-
scheduler.add_job(restart_space, "interval", hours=
|
|
|
|
| 481 |
scheduler.start()
|
|
|
|
| 60 |
LEADERBOARD_DF = None
|
| 61 |
|
| 62 |
def restart_space():
|
| 63 |
+
try:
|
| 64 |
+
logging.info(f"Attempting to restart space with repo ID: {REPO_ID}")
|
| 65 |
+
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
| 66 |
+
logging.info("Space restarted successfully.")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logging.error(f"Failed to restart space: {e}")
|
| 69 |
|
| 70 |
def time_diff_wrapper(func):
|
| 71 |
def wrapper(*args, **kwargs):
|
|
|
|
| 133 |
|
| 134 |
def init_space():
|
| 135 |
"""Initializes the application space, loading only necessary data."""
|
| 136 |
+
global NEW_DATA_ON_LEADERBOARD
|
| 137 |
+
NEW_DATA_ON_LEADERBOARD = True # Ensure new data is always pulled on restart
|
| 138 |
+
|
| 139 |
if DO_FULL_INIT:
|
| 140 |
# These downloads only occur on full initialization
|
| 141 |
try:
|
|
|
|
| 450 |
# Add webhooks
|
| 451 |
@webhooks_server.add_webhook
|
| 452 |
def update_leaderboard(payload: WebhookPayload) -> None:
|
| 453 |
+
"""Redownloads the leaderboard dataset each time it updates."""
|
| 454 |
if payload.repo.type == "dataset" and payload.event.action == "update":
|
| 455 |
global NEW_DATA_ON_LEADERBOARD
|
| 456 |
if NEW_DATA_ON_LEADERBOARD:
|
| 457 |
+
logging.info("Leaderboard data is already marked for update, skipping...")
|
| 458 |
return
|
| 459 |
+
logging.info("New data detected, downloading updated leaderboard dataset.")
|
| 460 |
NEW_DATA_ON_LEADERBOARD = True
|
| 461 |
|
| 462 |
+
# Download the latest version of the dataset
|
| 463 |
datasets.load_dataset(
|
| 464 |
AGGREGATED_REPO,
|
| 465 |
"default",
|
|
|
|
| 468 |
download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
|
| 469 |
verification_mode="no_checks"
|
| 470 |
)
|
| 471 |
+
logging.info("Leaderboard dataset successfully downloaded.")
|
| 472 |
|
| 473 |
# The below code is not used at the moment, as we can manage the queue file locally
|
| 474 |
LAST_UPDATE_QUEUE = datetime.datetime.now()
|
|
|
|
| 488 |
webhooks_server.launch()
|
| 489 |
|
| 490 |
scheduler = BackgroundScheduler()
|
| 491 |
+
scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
|
| 492 |
+
logging.info("Scheduler initialized to restart space every 1 hour.")
|
| 493 |
scheduler.start()
|