Quazim0t0 commited on
Commit
bc94cad
·
verified ·
1 Parent(s): d1078a3

Update database_schema.py

Browse files
Files changed (1) hide show
  1. database_schema.py +181 -97
database_schema.py CHANGED
@@ -8,35 +8,70 @@ which integrates benchmark selection, model evaluation, and leaderboard function
8
  import sqlite3
9
  import os
10
  import json
 
11
  from datetime import datetime, timedelta
12
  import pandas as pd
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  class DynamicHighscoresDB:
15
  """Database manager for the Dynamic Highscores system."""
16
 
17
  def __init__(self, db_path="dynamic_highscores.db"):
18
  """Initialize the database connection and create tables if they don't exist."""
19
  self.db_path = db_path
20
- self.conn = None
21
- self.cursor = None
22
- self.connect()
23
  self.create_tables()
24
 
25
- def connect(self):
26
- """Connect to the SQLite database."""
27
- self.conn = sqlite3.connect(self.db_path)
28
- self.conn.row_factory = sqlite3.Row
29
- self.cursor = self.conn.cursor()
 
 
30
 
31
  def close(self):
32
- """Close the database connection."""
33
- if self.conn:
34
- self.conn.close()
35
 
36
  def create_tables(self):
37
  """Create all necessary tables if they don't exist."""
 
 
 
38
  # Users table - stores user information
39
- self.cursor.execute('''
40
  CREATE TABLE IF NOT EXISTS users (
41
  id INTEGER PRIMARY KEY AUTOINCREMENT,
42
  username TEXT UNIQUE NOT NULL,
@@ -48,7 +83,7 @@ class DynamicHighscoresDB:
48
  ''')
49
 
50
  # Benchmarks table - stores information about available benchmarks
51
- self.cursor.execute('''
52
  CREATE TABLE IF NOT EXISTS benchmarks (
53
  id INTEGER PRIMARY KEY AUTOINCREMENT,
54
  name TEXT NOT NULL,
@@ -60,7 +95,7 @@ class DynamicHighscoresDB:
60
  ''')
61
 
62
  # Models table - stores information about submitted models
63
- self.cursor.execute('''
64
  CREATE TABLE IF NOT EXISTS models (
65
  id INTEGER PRIMARY KEY AUTOINCREMENT,
66
  name TEXT NOT NULL,
@@ -76,7 +111,7 @@ class DynamicHighscoresDB:
76
  ''')
77
 
78
  # Evaluations table - stores evaluation results
79
- self.cursor.execute('''
80
  CREATE TABLE IF NOT EXISTS evaluations (
81
  id INTEGER PRIMARY KEY AUTOINCREMENT,
82
  model_id INTEGER NOT NULL,
@@ -85,6 +120,7 @@ class DynamicHighscoresDB:
85
  results TEXT, -- JSON string of results
86
  score REAL, -- Overall score (can be NULL)
87
  submitted_at TEXT DEFAULT CURRENT_TIMESTAMP,
 
88
  completed_at TEXT,
89
  FOREIGN KEY (model_id) REFERENCES models (id),
90
  FOREIGN KEY (benchmark_id) REFERENCES benchmarks (id)
@@ -92,7 +128,7 @@ class DynamicHighscoresDB:
92
  ''')
93
 
94
  # Queue table - stores evaluation queue
95
- self.cursor.execute('''
96
  CREATE TABLE IF NOT EXISTS queue (
97
  id INTEGER PRIMARY KEY AUTOINCREMENT,
98
  evaluation_id INTEGER NOT NULL,
@@ -102,41 +138,50 @@ class DynamicHighscoresDB:
102
  )
103
  ''')
104
 
105
- self.conn.commit()
106
 
107
  # User management methods
108
  def add_user(self, username, hf_user_id, is_admin=False):
109
  """Add a new user to the database."""
 
 
 
110
  try:
111
- self.cursor.execute(
112
  "INSERT INTO users (username, hf_user_id, is_admin) VALUES (?, ?, ?)",
113
  (username, hf_user_id, is_admin)
114
  )
115
- self.conn.commit()
116
- return self.cursor.lastrowid
117
  except sqlite3.IntegrityError:
118
  # User already exists
119
- self.cursor.execute(
120
  "SELECT id FROM users WHERE hf_user_id = ?",
121
  (hf_user_id,)
122
  )
123
- return self.cursor.fetchone()[0]
 
124
 
125
  def get_user(self, hf_user_id):
126
  """Get user information by HuggingFace user ID."""
127
- self.cursor.execute(
 
 
128
  "SELECT * FROM users WHERE hf_user_id = ?",
129
  (hf_user_id,)
130
  )
131
- return dict(self.cursor.fetchone()) if self.cursor.fetchone() else None
 
132
 
133
  def can_submit_today(self, user_id):
134
  """Check if a user can submit a benchmark evaluation today."""
135
- self.cursor.execute(
 
 
136
  "SELECT is_admin, last_submission_date FROM users WHERE id = ?",
137
  (user_id,)
138
  )
139
- result = self.cursor.fetchone()
140
 
141
  if not result:
142
  return False
@@ -159,56 +204,71 @@ class DynamicHighscoresDB:
159
 
160
  def update_submission_date(self, user_id):
161
  """Update the last submission date for a user."""
 
 
 
162
  current_time = datetime.now().isoformat()
163
- self.cursor.execute(
164
  "UPDATE users SET last_submission_date = ? WHERE id = ?",
165
  (current_time, user_id)
166
  )
167
- self.conn.commit()
168
 
169
  # Benchmark management methods
170
  def add_benchmark(self, name, dataset_id, description="", metrics=None):
171
  """Add a new benchmark to the database."""
 
 
 
172
  if metrics is None:
173
  metrics = {}
174
 
175
  metrics_json = json.dumps(metrics)
176
 
177
  try:
178
- self.cursor.execute(
179
  "INSERT INTO benchmarks (name, dataset_id, description, metrics) VALUES (?, ?, ?, ?)",
180
  (name, dataset_id, description, metrics_json)
181
  )
182
- self.conn.commit()
183
- return self.cursor.lastrowid
184
  except sqlite3.IntegrityError:
185
  # Benchmark already exists with this dataset_id
186
- self.cursor.execute(
187
  "SELECT id FROM benchmarks WHERE dataset_id = ?",
188
  (dataset_id,)
189
  )
190
- return self.cursor.fetchone()[0]
 
191
 
192
  def get_benchmarks(self):
193
  """Get all available benchmarks."""
194
- self.cursor.execute("SELECT * FROM benchmarks")
195
- benchmarks = [dict(row) for row in self.cursor.fetchall()]
 
 
196
 
197
  # Parse metrics JSON
198
  for benchmark in benchmarks:
199
- benchmark['metrics'] = json.loads(benchmark['metrics'])
 
 
 
200
 
201
  return benchmarks
202
 
203
  def get_benchmark(self, benchmark_id):
204
  """Get benchmark information by ID."""
205
- self.cursor.execute(
 
 
206
  "SELECT * FROM benchmarks WHERE id = ?",
207
  (benchmark_id,)
208
  )
209
- benchmark = dict(self.cursor.fetchone()) if self.cursor.fetchone() else None
 
210
 
211
- if benchmark:
212
  benchmark['metrics'] = json.loads(benchmark['metrics'])
213
 
214
  return benchmark
@@ -216,62 +276,77 @@ class DynamicHighscoresDB:
216
  # Model management methods
217
  def add_model(self, name, hf_model_id, user_id, tag, parameters=None, description=""):
218
  """Add a new model to the database."""
 
 
 
219
  try:
220
- self.cursor.execute(
221
  "INSERT INTO models (name, hf_model_id, user_id, tag, parameters, description) VALUES (?, ?, ?, ?, ?, ?)",
222
  (name, hf_model_id, user_id, tag, parameters, description)
223
  )
224
- self.conn.commit()
225
- return self.cursor.lastrowid
226
  except sqlite3.IntegrityError:
227
  # Model already exists for this user
228
- self.cursor.execute(
229
  "SELECT id FROM models WHERE hf_model_id = ? AND user_id = ?",
230
  (hf_model_id, user_id)
231
  )
232
- return self.cursor.fetchone()[0]
 
233
 
234
  def get_models(self, tag=None):
235
  """Get all models, optionally filtered by tag."""
236
- if tag:
237
- self.cursor.execute(
 
 
238
  "SELECT * FROM models WHERE tag = ?",
239
  (tag,)
240
  )
241
  else:
242
- self.cursor.execute("SELECT * FROM models")
243
 
244
- return [dict(row) for row in self.cursor.fetchall()]
245
 
246
  def get_model(self, model_id):
247
  """Get model information by ID."""
248
- self.cursor.execute(
 
 
249
  "SELECT * FROM models WHERE id = ?",
250
  (model_id,)
251
  )
252
- return dict(self.cursor.fetchone()) if self.cursor.fetchone() else None
 
253
 
254
  # Evaluation management methods
255
  def add_evaluation(self, model_id, benchmark_id, priority=0):
256
  """Add a new evaluation to the database and queue."""
 
 
 
257
  # First, add the evaluation
258
- self.cursor.execute(
259
  "INSERT INTO evaluations (model_id, benchmark_id, status) VALUES (?, ?, 'pending')",
260
  (model_id, benchmark_id)
261
  )
262
- evaluation_id = self.cursor.lastrowid
263
 
264
  # Then, add it to the queue
265
- self.cursor.execute(
266
  "INSERT INTO queue (evaluation_id, priority) VALUES (?, ?)",
267
  (evaluation_id, priority)
268
  )
269
 
270
- self.conn.commit()
271
  return evaluation_id
272
 
273
  def update_evaluation_status(self, evaluation_id, status, results=None, score=None):
274
  """Update the status of an evaluation."""
 
 
 
275
  params = [status, evaluation_id]
276
  sql = "UPDATE evaluations SET status = ?"
277
 
@@ -284,80 +359,85 @@ class DynamicHighscoresDB:
284
  params.insert(1 if results is None else 2, score)
285
 
286
  if status in ['completed', 'failed']:
287
- sql += ", completed_at = ?"
288
- params.insert(1 if results is None and score is None else (2 if results is None or score is None else 3),
289
- datetime.now().isoformat())
290
 
291
  sql += " WHERE id = ?"
292
 
293
- self.cursor.execute(sql, params)
294
- self.conn.commit()
295
-
296
- # If completed or failed, remove from queue
297
- if status in ['completed', 'failed']:
298
- self.cursor.execute(
299
- "DELETE FROM queue WHERE evaluation_id = ?",
300
- (evaluation_id,)
301
- )
302
- self.conn.commit()
303
 
304
  def get_next_in_queue(self):
305
  """Get the next evaluation in the queue."""
306
- self.cursor.execute("""
307
- SELECT q.id as queue_id, q.evaluation_id, e.model_id, e.benchmark_id, m.hf_model_id, b.dataset_id
 
 
308
  FROM queue q
309
  JOIN evaluations e ON q.evaluation_id = e.id
310
- JOIN models m ON e.model_id = m.id
311
- JOIN benchmarks b ON e.benchmark_id = b.id
312
  WHERE e.status = 'pending'
313
  ORDER BY q.priority DESC, q.added_at ASC
314
  LIMIT 1
315
  """)
316
 
317
- result = self.cursor.fetchone()
318
- return dict(result) if result else None
319
 
320
- def get_evaluation_results(self, model_id=None, benchmark_id=None, tag=None):
321
- """Get evaluation results, optionally filtered by model, benchmark, or tag."""
 
 
322
  sql = """
323
  SELECT e.id, e.model_id, e.benchmark_id, e.status, e.results, e.score,
324
- e.submitted_at, e.completed_at, m.name as model_name, m.tag,
325
  b.name as benchmark_name
326
  FROM evaluations e
327
  JOIN models m ON e.model_id = m.id
328
  JOIN benchmarks b ON e.benchmark_id = b.id
329
- WHERE e.status = 'completed'
330
  """
331
 
332
  params = []
333
 
 
 
 
 
334
  if model_id:
335
  sql += " AND e.model_id = ?"
336
  params.append(model_id)
337
 
338
- if benchmark_id:
339
  sql += " AND e.benchmark_id = ?"
340
  params.append(benchmark_id)
341
 
342
- if tag:
343
  sql += " AND m.tag = ?"
344
  params.append(tag)
345
 
346
- sql += " ORDER BY e.completed_at DESC"
 
 
 
 
347
 
348
- self.cursor.execute(sql, params)
349
- results = [dict(row) for row in self.cursor.fetchall()]
350
 
351
  # Parse results JSON
352
  for result in results:
353
  if result['results']:
354
- result['results'] = json.loads(result['results'])
 
 
 
355
 
356
  return results
357
 
358
- def get_leaderboard_df(self, tag=None):
359
- """Get a pandas DataFrame of the leaderboard, optionally filtered by tag."""
360
- results = self.get_evaluation_results(tag=tag)
361
 
362
  if not results:
363
  return pd.DataFrame()
@@ -368,26 +448,30 @@ class DynamicHighscoresDB:
368
  for result in results:
369
  entry = {
370
  'model_name': result['model_name'],
371
- 'model_id': result['model_id'],
372
- 'benchmark_name': result['benchmark_name'],
373
- 'benchmark_id': result['benchmark_id'],
374
  'tag': result['tag'],
 
375
  'score': result['score'],
376
  'completed_at': result['completed_at']
377
  }
378
 
379
- # Add individual metrics from results
380
  if result['results'] and isinstance(result['results'], dict):
381
- for metric, value in result['results'].items():
382
- if isinstance(value, (int, float)):
383
- entry[f'metric_{metric}'] = value
384
 
385
  leaderboard_data.append(entry)
386
 
387
- return pd.DataFrame(leaderboard_data)
 
 
 
 
 
 
 
388
 
389
- # Initialize the database
390
  def init_db(db_path="dynamic_highscores.db"):
391
- """Initialize the database and return the database manager."""
392
  db = DynamicHighscoresDB(db_path)
393
- return db
 
8
  import sqlite3
9
  import os
10
  import json
11
+ import threading
12
  from datetime import datetime, timedelta
13
  import pandas as pd
14
 
15
+ class ThreadLocalDB:
16
+ """Thread-local database connection manager."""
17
+
18
+ _thread_local = threading.local()
19
+
20
+ def __init__(self, db_path):
21
+ """Initialize with database path."""
22
+ self.db_path = db_path
23
+
24
+ def get_connection(self):
25
+ """Get a thread-local database connection."""
26
+ if not hasattr(self._thread_local, 'conn') or self._thread_local.conn is None:
27
+ self._thread_local.conn = sqlite3.connect(self.db_path)
28
+ self._thread_local.conn.row_factory = sqlite3.Row
29
+ return self._thread_local.conn
30
+
31
+ def get_cursor(self):
32
+ """Get a cursor from the thread-local connection."""
33
+ conn = self.get_connection()
34
+ if not hasattr(self._thread_local, 'cursor') or self._thread_local.cursor is None:
35
+ self._thread_local.cursor = conn.cursor()
36
+ return self._thread_local.cursor
37
+
38
+ def close(self):
39
+ """Close the thread-local connection if it exists."""
40
+ if hasattr(self._thread_local, 'conn') and self._thread_local.conn is not None:
41
+ if hasattr(self._thread_local, 'cursor') and self._thread_local.cursor is not None:
42
+ self._thread_local.cursor.close()
43
+ self._thread_local.cursor = None
44
+ self._thread_local.conn.close()
45
+ self._thread_local.conn = None
46
+
47
  class DynamicHighscoresDB:
48
  """Database manager for the Dynamic Highscores system."""
49
 
50
  def __init__(self, db_path="dynamic_highscores.db"):
51
  """Initialize the database connection and create tables if they don't exist."""
52
  self.db_path = db_path
53
+ self.thread_local_db = ThreadLocalDB(db_path)
 
 
54
  self.create_tables()
55
 
56
+ def get_conn(self):
57
+ """Get the thread-local database connection."""
58
+ return self.thread_local_db.get_connection()
59
+
60
+ def get_cursor(self):
61
+ """Get the thread-local database cursor."""
62
+ return self.thread_local_db.get_cursor()
63
 
64
  def close(self):
65
+ """Close the thread-local database connection."""
66
+ self.thread_local_db.close()
 
67
 
68
  def create_tables(self):
69
  """Create all necessary tables if they don't exist."""
70
+ cursor = self.get_cursor()
71
+ conn = self.get_conn()
72
+
73
  # Users table - stores user information
74
+ cursor.execute('''
75
  CREATE TABLE IF NOT EXISTS users (
76
  id INTEGER PRIMARY KEY AUTOINCREMENT,
77
  username TEXT UNIQUE NOT NULL,
 
83
  ''')
84
 
85
  # Benchmarks table - stores information about available benchmarks
86
+ cursor.execute('''
87
  CREATE TABLE IF NOT EXISTS benchmarks (
88
  id INTEGER PRIMARY KEY AUTOINCREMENT,
89
  name TEXT NOT NULL,
 
95
  ''')
96
 
97
  # Models table - stores information about submitted models
98
+ cursor.execute('''
99
  CREATE TABLE IF NOT EXISTS models (
100
  id INTEGER PRIMARY KEY AUTOINCREMENT,
101
  name TEXT NOT NULL,
 
111
  ''')
112
 
113
  # Evaluations table - stores evaluation results
114
+ cursor.execute('''
115
  CREATE TABLE IF NOT EXISTS evaluations (
116
  id INTEGER PRIMARY KEY AUTOINCREMENT,
117
  model_id INTEGER NOT NULL,
 
120
  results TEXT, -- JSON string of results
121
  score REAL, -- Overall score (can be NULL)
122
  submitted_at TEXT DEFAULT CURRENT_TIMESTAMP,
123
+ started_at TEXT,
124
  completed_at TEXT,
125
  FOREIGN KEY (model_id) REFERENCES models (id),
126
  FOREIGN KEY (benchmark_id) REFERENCES benchmarks (id)
 
128
  ''')
129
 
130
  # Queue table - stores evaluation queue
131
+ cursor.execute('''
132
  CREATE TABLE IF NOT EXISTS queue (
133
  id INTEGER PRIMARY KEY AUTOINCREMENT,
134
  evaluation_id INTEGER NOT NULL,
 
138
  )
139
  ''')
140
 
141
+ conn.commit()
142
 
143
  # User management methods
144
  def add_user(self, username, hf_user_id, is_admin=False):
145
  """Add a new user to the database."""
146
+ cursor = self.get_cursor()
147
+ conn = self.get_conn()
148
+
149
  try:
150
+ cursor.execute(
151
  "INSERT INTO users (username, hf_user_id, is_admin) VALUES (?, ?, ?)",
152
  (username, hf_user_id, is_admin)
153
  )
154
+ conn.commit()
155
+ return cursor.lastrowid
156
  except sqlite3.IntegrityError:
157
  # User already exists
158
+ cursor.execute(
159
  "SELECT id FROM users WHERE hf_user_id = ?",
160
  (hf_user_id,)
161
  )
162
+ row = cursor.fetchone()
163
+ return row[0] if row else None
164
 
165
  def get_user(self, hf_user_id):
166
  """Get user information by HuggingFace user ID."""
167
+ cursor = self.get_cursor()
168
+
169
+ cursor.execute(
170
  "SELECT * FROM users WHERE hf_user_id = ?",
171
  (hf_user_id,)
172
  )
173
+ row = cursor.fetchone()
174
+ return dict(row) if row else None
175
 
176
  def can_submit_today(self, user_id):
177
  """Check if a user can submit a benchmark evaluation today."""
178
+ cursor = self.get_cursor()
179
+
180
+ cursor.execute(
181
  "SELECT is_admin, last_submission_date FROM users WHERE id = ?",
182
  (user_id,)
183
  )
184
+ result = cursor.fetchone()
185
 
186
  if not result:
187
  return False
 
204
 
205
  def update_submission_date(self, user_id):
206
  """Update the last submission date for a user."""
207
+ cursor = self.get_cursor()
208
+ conn = self.get_conn()
209
+
210
  current_time = datetime.now().isoformat()
211
+ cursor.execute(
212
  "UPDATE users SET last_submission_date = ? WHERE id = ?",
213
  (current_time, user_id)
214
  )
215
+ conn.commit()
216
 
217
  # Benchmark management methods
218
  def add_benchmark(self, name, dataset_id, description="", metrics=None):
219
  """Add a new benchmark to the database."""
220
+ cursor = self.get_cursor()
221
+ conn = self.get_conn()
222
+
223
  if metrics is None:
224
  metrics = {}
225
 
226
  metrics_json = json.dumps(metrics)
227
 
228
  try:
229
+ cursor.execute(
230
  "INSERT INTO benchmarks (name, dataset_id, description, metrics) VALUES (?, ?, ?, ?)",
231
  (name, dataset_id, description, metrics_json)
232
  )
233
+ conn.commit()
234
+ return cursor.lastrowid
235
  except sqlite3.IntegrityError:
236
  # Benchmark already exists with this dataset_id
237
+ cursor.execute(
238
  "SELECT id FROM benchmarks WHERE dataset_id = ?",
239
  (dataset_id,)
240
  )
241
+ row = cursor.fetchone()
242
+ return row[0] if row else None
243
 
244
  def get_benchmarks(self):
245
  """Get all available benchmarks."""
246
+ cursor = self.get_cursor()
247
+
248
+ cursor.execute("SELECT * FROM benchmarks")
249
+ benchmarks = [dict(row) for row in cursor.fetchall()]
250
 
251
  # Parse metrics JSON
252
  for benchmark in benchmarks:
253
+ if benchmark['metrics']:
254
+ benchmark['metrics'] = json.loads(benchmark['metrics'])
255
+ else:
256
+ benchmark['metrics'] = {}
257
 
258
  return benchmarks
259
 
260
  def get_benchmark(self, benchmark_id):
261
  """Get benchmark information by ID."""
262
+ cursor = self.get_cursor()
263
+
264
+ cursor.execute(
265
  "SELECT * FROM benchmarks WHERE id = ?",
266
  (benchmark_id,)
267
  )
268
+ row = cursor.fetchone()
269
+ benchmark = dict(row) if row else None
270
 
271
+ if benchmark and benchmark['metrics']:
272
  benchmark['metrics'] = json.loads(benchmark['metrics'])
273
 
274
  return benchmark
 
276
  # Model management methods
277
  def add_model(self, name, hf_model_id, user_id, tag, parameters=None, description=""):
278
  """Add a new model to the database."""
279
+ cursor = self.get_cursor()
280
+ conn = self.get_conn()
281
+
282
  try:
283
+ cursor.execute(
284
  "INSERT INTO models (name, hf_model_id, user_id, tag, parameters, description) VALUES (?, ?, ?, ?, ?, ?)",
285
  (name, hf_model_id, user_id, tag, parameters, description)
286
  )
287
+ conn.commit()
288
+ return cursor.lastrowid
289
  except sqlite3.IntegrityError:
290
  # Model already exists for this user
291
+ cursor.execute(
292
  "SELECT id FROM models WHERE hf_model_id = ? AND user_id = ?",
293
  (hf_model_id, user_id)
294
  )
295
+ row = cursor.fetchone()
296
+ return row[0] if row else None
297
 
298
  def get_models(self, tag=None):
299
  """Get all models, optionally filtered by tag."""
300
+ cursor = self.get_cursor()
301
+
302
+ if tag and tag.lower() != "all":
303
+ cursor.execute(
304
  "SELECT * FROM models WHERE tag = ?",
305
  (tag,)
306
  )
307
  else:
308
+ cursor.execute("SELECT * FROM models")
309
 
310
+ return [dict(row) for row in cursor.fetchall()]
311
 
312
  def get_model(self, model_id):
313
  """Get model information by ID."""
314
+ cursor = self.get_cursor()
315
+
316
+ cursor.execute(
317
  "SELECT * FROM models WHERE id = ?",
318
  (model_id,)
319
  )
320
+ row = cursor.fetchone()
321
+ return dict(row) if row else None
322
 
323
  # Evaluation management methods
324
  def add_evaluation(self, model_id, benchmark_id, priority=0):
325
  """Add a new evaluation to the database and queue."""
326
+ cursor = self.get_cursor()
327
+ conn = self.get_conn()
328
+
329
  # First, add the evaluation
330
+ cursor.execute(
331
  "INSERT INTO evaluations (model_id, benchmark_id, status) VALUES (?, ?, 'pending')",
332
  (model_id, benchmark_id)
333
  )
334
+ evaluation_id = cursor.lastrowid
335
 
336
  # Then, add it to the queue
337
+ cursor.execute(
338
  "INSERT INTO queue (evaluation_id, priority) VALUES (?, ?)",
339
  (evaluation_id, priority)
340
  )
341
 
342
+ conn.commit()
343
  return evaluation_id
344
 
345
  def update_evaluation_status(self, evaluation_id, status, results=None, score=None):
346
  """Update the status of an evaluation."""
347
+ cursor = self.get_cursor()
348
+ conn = self.get_conn()
349
+
350
  params = [status, evaluation_id]
351
  sql = "UPDATE evaluations SET status = ?"
352
 
 
359
  params.insert(1 if results is None else 2, score)
360
 
361
  if status in ['completed', 'failed']:
362
+ sql += ", completed_at = datetime('now')"
363
+ elif status == 'running':
364
+ sql += ", started_at = datetime('now')"
365
 
366
  sql += " WHERE id = ?"
367
 
368
+ cursor.execute(sql, params)
369
+ conn.commit()
 
 
 
 
 
 
 
 
370
 
371
  def get_next_in_queue(self):
372
  """Get the next evaluation in the queue."""
373
+ cursor = self.get_cursor()
374
+
375
+ cursor.execute("""
376
+ SELECT q.*, e.id as evaluation_id, e.model_id, e.benchmark_id, e.status
377
  FROM queue q
378
  JOIN evaluations e ON q.evaluation_id = e.id
 
 
379
  WHERE e.status = 'pending'
380
  ORDER BY q.priority DESC, q.added_at ASC
381
  LIMIT 1
382
  """)
383
 
384
+ row = cursor.fetchone()
385
+ return dict(row) if row else None
386
 
387
+ def get_evaluation_results(self, model_id=None, benchmark_id=None, tag=None, status=None, limit=None):
388
+ """Get evaluation results, optionally filtered by model, benchmark, tag, or status."""
389
+ cursor = self.get_cursor()
390
+
391
  sql = """
392
  SELECT e.id, e.model_id, e.benchmark_id, e.status, e.results, e.score,
393
+ e.submitted_at, e.started_at, e.completed_at, m.name as model_name, m.tag,
394
  b.name as benchmark_name
395
  FROM evaluations e
396
  JOIN models m ON e.model_id = m.id
397
  JOIN benchmarks b ON e.benchmark_id = b.id
398
+ WHERE 1=1
399
  """
400
 
401
  params = []
402
 
403
+ if status:
404
+ sql += " AND e.status = ?"
405
+ params.append(status)
406
+
407
  if model_id:
408
  sql += " AND e.model_id = ?"
409
  params.append(model_id)
410
 
411
+ if benchmark_id and benchmark_id != "all" and benchmark_id.lower() != "all":
412
  sql += " AND e.benchmark_id = ?"
413
  params.append(benchmark_id)
414
 
415
+ if tag and tag.lower() != "all":
416
  sql += " AND m.tag = ?"
417
  params.append(tag)
418
 
419
+ sql += " ORDER BY e.submitted_at DESC"
420
+
421
+ if limit:
422
+ sql += " LIMIT ?"
423
+ params.append(limit)
424
 
425
+ cursor.execute(sql, params)
426
+ results = [dict(row) for row in cursor.fetchall()]
427
 
428
  # Parse results JSON
429
  for result in results:
430
  if result['results']:
431
+ try:
432
+ result['results'] = json.loads(result['results'])
433
+ except:
434
+ result['results'] = {}
435
 
436
  return results
437
 
438
+ def get_leaderboard_df(self, tag=None, benchmark_id=None):
439
+ """Get a pandas DataFrame of the leaderboard, optionally filtered by tag and benchmark."""
440
+ results = self.get_evaluation_results(tag=tag, benchmark_id=benchmark_id, status="completed")
441
 
442
  if not results:
443
  return pd.DataFrame()
 
448
  for result in results:
449
  entry = {
450
  'model_name': result['model_name'],
 
 
 
451
  'tag': result['tag'],
452
+ 'benchmark_name': result['benchmark_name'],
453
  'score': result['score'],
454
  'completed_at': result['completed_at']
455
  }
456
 
457
+ # Add any additional metrics from results
458
  if result['results'] and isinstance(result['results'], dict):
459
+ for key, value in result['results'].items():
460
+ if isinstance(value, (int, float)) and key not in entry:
461
+ entry[key] = value
462
 
463
  leaderboard_data.append(entry)
464
 
465
+ # Convert to DataFrame
466
+ df = pd.DataFrame(leaderboard_data)
467
+
468
+ # Sort by score (descending)
469
+ if not df.empty and 'score' in df.columns:
470
+ df = df.sort_values('score', ascending=False)
471
+
472
+ return df
473
 
 
474
  def init_db(db_path="dynamic_highscores.db"):
475
+ """Initialize and return the database manager."""
476
  db = DynamicHighscoresDB(db_path)
477
+ return db