Quazim0t0 commited on
Commit
b9e27dc
·
verified ·
1 Parent(s): e3d539f

Delete database_schema.py

Browse files
Files changed (1) hide show
  1. database_schema.py +0 -393
database_schema.py DELETED
@@ -1,393 +0,0 @@
1
- """
2
- Database schema for Dynamic Highscores system.
3
-
4
- This module defines the SQLite database schema for the Dynamic Highscores system,
5
- which integrates benchmark selection, model evaluation, and leaderboard functionality.
6
- """
7
-
8
- import sqlite3
9
- import os
10
- import json
11
- from datetime import datetime, timedelta
12
- import pandas as pd
13
-
14
- class DynamicHighscoresDB:
15
- """Database manager for the Dynamic Highscores system."""
16
-
17
- def __init__(self, db_path="dynamic_highscores.db"):
18
- """Initialize the database connection and create tables if they don't exist."""
19
- self.db_path = db_path
20
- self.conn = None
21
- self.cursor = None
22
- self.connect()
23
- self.create_tables()
24
-
25
- def connect(self):
26
- """Connect to the SQLite database."""
27
- self.conn = sqlite3.connect(self.db_path)
28
- self.conn.row_factory = sqlite3.Row
29
- self.cursor = self.conn.cursor()
30
-
31
- def close(self):
32
- """Close the database connection."""
33
- if self.conn:
34
- self.conn.close()
35
-
36
- def create_tables(self):
37
- """Create all necessary tables if they don't exist."""
38
- # Users table - stores user information
39
- self.cursor.execute('''
40
- CREATE TABLE IF NOT EXISTS users (
41
- id INTEGER PRIMARY KEY AUTOINCREMENT,
42
- username TEXT UNIQUE NOT NULL,
43
- hf_user_id TEXT UNIQUE NOT NULL,
44
- is_admin BOOLEAN DEFAULT 0,
45
- last_submission_date TEXT,
46
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
47
- )
48
- ''')
49
-
50
- # Benchmarks table - stores information about available benchmarks
51
- self.cursor.execute('''
52
- CREATE TABLE IF NOT EXISTS benchmarks (
53
- id INTEGER PRIMARY KEY AUTOINCREMENT,
54
- name TEXT NOT NULL,
55
- dataset_id TEXT NOT NULL,
56
- description TEXT,
57
- metrics TEXT, -- JSON string of metrics
58
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
59
- )
60
- ''')
61
-
62
- # Models table - stores information about submitted models
63
- self.cursor.execute('''
64
- CREATE TABLE IF NOT EXISTS models (
65
- id INTEGER PRIMARY KEY AUTOINCREMENT,
66
- name TEXT NOT NULL,
67
- hf_model_id TEXT NOT NULL,
68
- user_id INTEGER NOT NULL,
69
- tag TEXT NOT NULL, -- One of: Merge, Agent, Reasoning, Coding, etc.
70
- parameters TEXT, -- Number of parameters (can be NULL)
71
- description TEXT,
72
- created_at TEXT DEFAULT CURRENT_TIMESTAMP,
73
- FOREIGN KEY (user_id) REFERENCES users (id),
74
- UNIQUE (hf_model_id, user_id)
75
- )
76
- ''')
77
-
78
- # Evaluations table - stores evaluation results
79
- self.cursor.execute('''
80
- CREATE TABLE IF NOT EXISTS evaluations (
81
- id INTEGER PRIMARY KEY AUTOINCREMENT,
82
- model_id INTEGER NOT NULL,
83
- benchmark_id INTEGER NOT NULL,
84
- status TEXT NOT NULL, -- pending, running, completed, failed
85
- results TEXT, -- JSON string of results
86
- score REAL, -- Overall score (can be NULL)
87
- submitted_at TEXT DEFAULT CURRENT_TIMESTAMP,
88
- completed_at TEXT,
89
- FOREIGN KEY (model_id) REFERENCES models (id),
90
- FOREIGN KEY (benchmark_id) REFERENCES benchmarks (id)
91
- )
92
- ''')
93
-
94
- # Queue table - stores evaluation queue
95
- self.cursor.execute('''
96
- CREATE TABLE IF NOT EXISTS queue (
97
- id INTEGER PRIMARY KEY AUTOINCREMENT,
98
- evaluation_id INTEGER NOT NULL,
99
- priority INTEGER DEFAULT 0, -- Higher number = higher priority
100
- added_at TEXT DEFAULT CURRENT_TIMESTAMP,
101
- FOREIGN KEY (evaluation_id) REFERENCES evaluations (id)
102
- )
103
- ''')
104
-
105
- self.conn.commit()
106
-
107
- # User management methods
108
- def add_user(self, username, hf_user_id, is_admin=False):
109
- """Add a new user to the database."""
110
- try:
111
- self.cursor.execute(
112
- "INSERT INTO users (username, hf_user_id, is_admin) VALUES (?, ?, ?)",
113
- (username, hf_user_id, is_admin)
114
- )
115
- self.conn.commit()
116
- return self.cursor.lastrowid
117
- except sqlite3.IntegrityError:
118
- # User already exists
119
- self.cursor.execute(
120
- "SELECT id FROM users WHERE hf_user_id = ?",
121
- (hf_user_id,)
122
- )
123
- return self.cursor.fetchone()[0]
124
-
125
- def get_user(self, hf_user_id):
126
- """Get user information by HuggingFace user ID."""
127
- self.cursor.execute(
128
- "SELECT * FROM users WHERE hf_user_id = ?",
129
- (hf_user_id,)
130
- )
131
- return dict(self.cursor.fetchone()) if self.cursor.fetchone() else None
132
-
133
- def can_submit_today(self, user_id):
134
- """Check if a user can submit a benchmark evaluation today."""
135
- self.cursor.execute(
136
- "SELECT is_admin, last_submission_date FROM users WHERE id = ?",
137
- (user_id,)
138
- )
139
- result = self.cursor.fetchone()
140
-
141
- if not result:
142
- return False
143
-
144
- user_data = dict(result)
145
-
146
- # Admin can always submit
147
- if user_data['is_admin']:
148
- return True
149
-
150
- # If no previous submission, user can submit
151
- if not user_data['last_submission_date']:
152
- return True
153
-
154
- # Check if last submission was before today
155
- last_date = datetime.fromisoformat(user_data['last_submission_date'])
156
- today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
157
-
158
- return last_date < today
159
-
160
- def update_submission_date(self, user_id):
161
- """Update the last submission date for a user."""
162
- current_time = datetime.now().isoformat()
163
- self.cursor.execute(
164
- "UPDATE users SET last_submission_date = ? WHERE id = ?",
165
- (current_time, user_id)
166
- )
167
- self.conn.commit()
168
-
169
- # Benchmark management methods
170
- def add_benchmark(self, name, dataset_id, description="", metrics=None):
171
- """Add a new benchmark to the database."""
172
- if metrics is None:
173
- metrics = {}
174
-
175
- metrics_json = json.dumps(metrics)
176
-
177
- try:
178
- self.cursor.execute(
179
- "INSERT INTO benchmarks (name, dataset_id, description, metrics) VALUES (?, ?, ?, ?)",
180
- (name, dataset_id, description, metrics_json)
181
- )
182
- self.conn.commit()
183
- return self.cursor.lastrowid
184
- except sqlite3.IntegrityError:
185
- # Benchmark already exists with this dataset_id
186
- self.cursor.execute(
187
- "SELECT id FROM benchmarks WHERE dataset_id = ?",
188
- (dataset_id,)
189
- )
190
- return self.cursor.fetchone()[0]
191
-
192
- def get_benchmarks(self):
193
- """Get all available benchmarks."""
194
- self.cursor.execute("SELECT * FROM benchmarks")
195
- benchmarks = [dict(row) for row in self.cursor.fetchall()]
196
-
197
- # Parse metrics JSON
198
- for benchmark in benchmarks:
199
- benchmark['metrics'] = json.loads(benchmark['metrics'])
200
-
201
- return benchmarks
202
-
203
- def get_benchmark(self, benchmark_id):
204
- """Get benchmark information by ID."""
205
- self.cursor.execute(
206
- "SELECT * FROM benchmarks WHERE id = ?",
207
- (benchmark_id,)
208
- )
209
- benchmark = dict(self.cursor.fetchone()) if self.cursor.fetchone() else None
210
-
211
- if benchmark:
212
- benchmark['metrics'] = json.loads(benchmark['metrics'])
213
-
214
- return benchmark
215
-
216
- # Model management methods
217
- def add_model(self, name, hf_model_id, user_id, tag, parameters=None, description=""):
218
- """Add a new model to the database."""
219
- try:
220
- self.cursor.execute(
221
- "INSERT INTO models (name, hf_model_id, user_id, tag, parameters, description) VALUES (?, ?, ?, ?, ?, ?)",
222
- (name, hf_model_id, user_id, tag, parameters, description)
223
- )
224
- self.conn.commit()
225
- return self.cursor.lastrowid
226
- except sqlite3.IntegrityError:
227
- # Model already exists for this user
228
- self.cursor.execute(
229
- "SELECT id FROM models WHERE hf_model_id = ? AND user_id = ?",
230
- (hf_model_id, user_id)
231
- )
232
- return self.cursor.fetchone()[0]
233
-
234
- def get_models(self, tag=None):
235
- """Get all models, optionally filtered by tag."""
236
- if tag:
237
- self.cursor.execute(
238
- "SELECT * FROM models WHERE tag = ?",
239
- (tag,)
240
- )
241
- else:
242
- self.cursor.execute("SELECT * FROM models")
243
-
244
- return [dict(row) for row in self.cursor.fetchall()]
245
-
246
- def get_model(self, model_id):
247
- """Get model information by ID."""
248
- self.cursor.execute(
249
- "SELECT * FROM models WHERE id = ?",
250
- (model_id,)
251
- )
252
- return dict(self.cursor.fetchone()) if self.cursor.fetchone() else None
253
-
254
- # Evaluation management methods
255
- def add_evaluation(self, model_id, benchmark_id, priority=0):
256
- """Add a new evaluation to the database and queue."""
257
- # First, add the evaluation
258
- self.cursor.execute(
259
- "INSERT INTO evaluations (model_id, benchmark_id, status) VALUES (?, ?, 'pending')",
260
- (model_id, benchmark_id)
261
- )
262
- evaluation_id = self.cursor.lastrowid
263
-
264
- # Then, add it to the queue
265
- self.cursor.execute(
266
- "INSERT INTO queue (evaluation_id, priority) VALUES (?, ?)",
267
- (evaluation_id, priority)
268
- )
269
-
270
- self.conn.commit()
271
- return evaluation_id
272
-
273
- def update_evaluation_status(self, evaluation_id, status, results=None, score=None):
274
- """Update the status of an evaluation."""
275
- params = [status, evaluation_id]
276
- sql = "UPDATE evaluations SET status = ?"
277
-
278
- if results is not None:
279
- sql += ", results = ?"
280
- params.insert(1, json.dumps(results))
281
-
282
- if score is not None:
283
- sql += ", score = ?"
284
- params.insert(1 if results is None else 2, score)
285
-
286
- if status in ['completed', 'failed']:
287
- sql += ", completed_at = ?"
288
- params.insert(1 if results is None and score is None else (2 if results is None or score is None else 3),
289
- datetime.now().isoformat())
290
-
291
- sql += " WHERE id = ?"
292
-
293
- self.cursor.execute(sql, params)
294
- self.conn.commit()
295
-
296
- # If completed or failed, remove from queue
297
- if status in ['completed', 'failed']:
298
- self.cursor.execute(
299
- "DELETE FROM queue WHERE evaluation_id = ?",
300
- (evaluation_id,)
301
- )
302
- self.conn.commit()
303
-
304
- def get_next_in_queue(self):
305
- """Get the next evaluation in the queue."""
306
- self.cursor.execute("""
307
- SELECT q.id as queue_id, q.evaluation_id, e.model_id, e.benchmark_id, m.hf_model_id, b.dataset_id
308
- FROM queue q
309
- JOIN evaluations e ON q.evaluation_id = e.id
310
- JOIN models m ON e.model_id = m.id
311
- JOIN benchmarks b ON e.benchmark_id = b.id
312
- WHERE e.status = 'pending'
313
- ORDER BY q.priority DESC, q.added_at ASC
314
- LIMIT 1
315
- """)
316
-
317
- result = self.cursor.fetchone()
318
- return dict(result) if result else None
319
-
320
- def get_evaluation_results(self, model_id=None, benchmark_id=None, tag=None):
321
- """Get evaluation results, optionally filtered by model, benchmark, or tag."""
322
- sql = """
323
- SELECT e.id, e.model_id, e.benchmark_id, e.status, e.results, e.score,
324
- e.submitted_at, e.completed_at, m.name as model_name, m.tag,
325
- b.name as benchmark_name
326
- FROM evaluations e
327
- JOIN models m ON e.model_id = m.id
328
- JOIN benchmarks b ON e.benchmark_id = b.id
329
- WHERE e.status = 'completed'
330
- """
331
-
332
- params = []
333
-
334
- if model_id:
335
- sql += " AND e.model_id = ?"
336
- params.append(model_id)
337
-
338
- if benchmark_id:
339
- sql += " AND e.benchmark_id = ?"
340
- params.append(benchmark_id)
341
-
342
- if tag:
343
- sql += " AND m.tag = ?"
344
- params.append(tag)
345
-
346
- sql += " ORDER BY e.completed_at DESC"
347
-
348
- self.cursor.execute(sql, params)
349
- results = [dict(row) for row in self.cursor.fetchall()]
350
-
351
- # Parse results JSON
352
- for result in results:
353
- if result['results']:
354
- result['results'] = json.loads(result['results'])
355
-
356
- return results
357
-
358
- def get_leaderboard_df(self, tag=None):
359
- """Get a pandas DataFrame of the leaderboard, optionally filtered by tag."""
360
- results = self.get_evaluation_results(tag=tag)
361
-
362
- if not results:
363
- return pd.DataFrame()
364
-
365
- # Create a list of dictionaries for the DataFrame
366
- leaderboard_data = []
367
-
368
- for result in results:
369
- entry = {
370
- 'model_name': result['model_name'],
371
- 'model_id': result['model_id'],
372
- 'benchmark_name': result['benchmark_name'],
373
- 'benchmark_id': result['benchmark_id'],
374
- 'tag': result['tag'],
375
- 'score': result['score'],
376
- 'completed_at': result['completed_at']
377
- }
378
-
379
- # Add individual metrics from results
380
- if result['results'] and isinstance(result['results'], dict):
381
- for metric, value in result['results'].items():
382
- if isinstance(value, (int, float)):
383
- entry[f'metric_{metric}'] = value
384
-
385
- leaderboard_data.append(entry)
386
-
387
- return pd.DataFrame(leaderboard_data)
388
-
389
- # Initialize the database
390
- def init_db(db_path="dynamic_highscores.db"):
391
- """Initialize the database and return the database manager."""
392
- db = DynamicHighscoresDB(db_path)
393
- return db