tfrere commited on
Commit
c13a7ef
·
1 Parent(s): 54729e7

add webhook to fetch data properly

Browse files
Files changed (2) hide show
  1. server/.env.example +1 -1
  2. server/server.py +60 -13
server/.env.example CHANGED
@@ -4,7 +4,7 @@ HUGGING_FACE_HUB_TOKEN=your_token_here
4
 
5
  # Repository ID for storing leaderboard data (required)
6
  # Format: username/repo-name
7
- HUGGING_FACE_STORAGE_REPO=tfrere/leaderboard-explorer
8
 
9
  # File path in the repository (required)
10
  HUGGING_FACE_STORAGE_FILE_PATH=final_leaderboards.json
 
4
 
5
  # Repository ID for storing leaderboard data (required)
6
  # Format: username/repo-name
7
+ HUGGING_FACE_STORAGE_REPO=leaderboard-explorer/leaderboard_explorer
8
 
9
  # File path in the repository (required)
10
  HUGGING_FACE_STORAGE_FILE_PATH=final_leaderboards.json
server/server.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.staticfiles import StaticFiles
4
  from apscheduler.schedulers.background import BackgroundScheduler
@@ -7,6 +7,13 @@ import os
7
  from dotenv import load_dotenv
8
  from huggingface_hub import HfApi
9
  import json
 
 
 
 
 
 
 
10
 
11
  # Load environment variables
12
  load_dotenv()
@@ -49,31 +56,41 @@ hf_api = HfApi(token=HF_TOKEN)
49
  def fetch_leaderboards():
50
  """Fetch leaderboards data from Hugging Face"""
51
  try:
52
- # Download the JSON file directly
 
53
  json_path = hf_api.hf_hub_download(
54
  repo_id=REPO_ID,
55
  filename=FILE_PATH,
56
- repo_type="dataset"
 
 
57
  )
58
 
 
 
59
  with open(json_path, 'r') as f:
60
- cache["data"] = json.load(f)
 
 
61
  cache["last_updated"] = datetime.now()
62
- print(f"Cache updated at {cache['last_updated']}")
 
 
 
 
 
63
 
64
  except Exception as e:
65
- print(f"Error fetching data: {str(e)}")
66
  if not cache["data"]: # Only raise if we don't have any cached data
67
  raise HTTPException(status_code=500, detail="Failed to fetch leaderboards data")
68
 
69
- # Initialize scheduler
70
- scheduler = BackgroundScheduler()
71
- scheduler.add_job(fetch_leaderboards, 'interval', minutes=CACHE_DURATION_MINUTES)
72
- scheduler.start()
73
-
74
  # Initial fetch
75
  fetch_leaderboards()
76
 
 
 
 
77
  @app.get("/api/leaderboards")
78
  async def get_leaderboards():
79
  """Get leaderboards data from cache"""
@@ -94,8 +111,38 @@ async def health_check():
94
  "last_updated": cache["last_updated"].isoformat() if cache["last_updated"] else None
95
  }
96
 
97
- # Mount static files
98
- app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  if __name__ == "__main__":
101
  import uvicorn
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.staticfiles import StaticFiles
4
  from apscheduler.schedulers.background import BackgroundScheduler
 
7
  from dotenv import load_dotenv
8
  from huggingface_hub import HfApi
9
  import json
10
+ import logging
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=logging.INFO,
15
+ format='%(asctime)s - %(levelname)s - %(message)s'
16
+ )
17
 
18
  # Load environment variables
19
  load_dotenv()
 
56
  def fetch_leaderboards():
57
  """Fetch leaderboards data from Hugging Face"""
58
  try:
59
+ logging.info(f"Fetching leaderboards from {REPO_ID}/{FILE_PATH}")
60
+ # Download the JSON file directly with force_download to ensure we get the latest version
61
  json_path = hf_api.hf_hub_download(
62
  repo_id=REPO_ID,
63
  filename=FILE_PATH,
64
+ repo_type="dataset",
65
+ force_download=True, # Force download to ensure we get the latest version
66
+ force_filename="leaderboards_latest.json" # Force a specific filename to avoid caching issues
67
  )
68
 
69
+ logging.info(f"File downloaded to: {json_path}")
70
+
71
  with open(json_path, 'r') as f:
72
+ new_data = json.load(f)
73
+ old_data = cache["data"]
74
+ cache["data"] = new_data
75
  cache["last_updated"] = datetime.now()
76
+
77
+ # Log the differences
78
+ old_len = len(old_data) if old_data and isinstance(old_data, list) else 0
79
+ new_len = len(new_data) if isinstance(new_data, list) else 0
80
+ logging.info(f"Cache updated: Old entries: {old_len}, New entries: {new_len}")
81
+ logging.info(f"Cache update timestamp: {cache['last_updated']}")
82
 
83
  except Exception as e:
84
+ logging.error(f"Error fetching data: {str(e)}", exc_info=True)
85
  if not cache["data"]: # Only raise if we don't have any cached data
86
  raise HTTPException(status_code=500, detail="Failed to fetch leaderboards data")
87
 
 
 
 
 
 
88
  # Initial fetch
89
  fetch_leaderboards()
90
 
91
+ # Mount static files
92
+ app.mount("/static", StaticFiles(directory="static", html=True), name="static")
93
+
94
  @app.get("/api/leaderboards")
95
  async def get_leaderboards():
96
  """Get leaderboards data from cache"""
 
111
  "last_updated": cache["last_updated"].isoformat() if cache["last_updated"] else None
112
  }
113
 
114
+ @app.post("/api/webhook")
115
+ async def handle_webhook(request: Request):
116
+ """Handle webhook notifications from Hugging Face Hub"""
117
+ try:
118
+ body = await request.json()
119
+ logging.info(f"Received webhook with payload: {body}")
120
+
121
+ # Get the event details
122
+ event = body.get("event", {})
123
+
124
+ # Verify if it's a relevant update (repo content update)
125
+ if event.get("action") == "update" and event.get("scope") == "repo.content":
126
+ try:
127
+ logging.info(f"Dataset update detected for repo {REPO_ID}, file {FILE_PATH}")
128
+ # Force a clean fetch
129
+ fetch_leaderboards()
130
+ if cache["last_updated"]:
131
+ logging.info(f"Cache successfully updated at {cache['last_updated']}")
132
+ return {"status": "success", "message": "Cache updated"}
133
+ else:
134
+ logging.error("Cache update failed: last_updated is None")
135
+ return {"status": "error", "message": "Cache update failed"}
136
+ except Exception as fetch_error:
137
+ logging.error(f"Error during fetch_leaderboards: {str(fetch_error)}", exc_info=True)
138
+ return {"status": "error", "message": f"Failed to update cache: {str(fetch_error)}"}
139
+
140
+ logging.info(f"Ignoring webhook event: action={event.get('action')}, scope={event.get('scope')}")
141
+ return {"status": "ignored", "message": "Event type not relevant"}
142
+
143
+ except Exception as e:
144
+ logging.error(f"Error processing webhook: {str(e)}", exc_info=True)
145
+ raise HTTPException(status_code=500, detail=f"Failed to process webhook: {str(e)}")
146
 
147
  if __name__ == "__main__":
148
  import uvicorn