|
""" |
|
Test script for Dynamic Highscores application. |
|
|
|
This script tests the key functionality of the Dynamic Highscores application |
|
to ensure everything works as expected before deployment. |
|
""" |
|
|
|
import os |
|
import unittest |
|
import tempfile |
|
import sqlite3 |
|
from unittest.mock import MagicMock, patch |
|
|
|
|
|
from database_schema import DynamicHighscoresDB |
|
from auth import HuggingFaceAuth |
|
from benchmark_selection import BenchmarkSelector |
|
from evaluation_queue import EvaluationQueue |
|
from leaderboard import Leaderboard |
|
|
|
class TestDynamicHighscores(unittest.TestCase): |
|
"""Test cases for Dynamic Highscores application.""" |
|
|
|
def setUp(self): |
|
"""Set up test environment.""" |
|
|
|
self.db_fd, self.db_path = tempfile.mkstemp() |
|
self.db = DynamicHighscoresDB(self.db_path) |
|
|
|
|
|
self.auth_manager = HuggingFaceAuth(self.db) |
|
|
|
|
|
self.benchmark_selector = BenchmarkSelector(self.db, self.auth_manager) |
|
self.evaluation_queue = EvaluationQueue(self.db, self.auth_manager) |
|
self.leaderboard = Leaderboard(self.db) |
|
|
|
def tearDown(self): |
|
"""Clean up test environment.""" |
|
os.close(self.db_fd) |
|
os.unlink(self.db_path) |
|
|
|
def test_database_schema(self): |
|
"""Test database schema creation.""" |
|
|
|
conn = sqlite3.connect(self.db_path) |
|
cursor = conn.cursor() |
|
|
|
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") |
|
tables = cursor.fetchall() |
|
table_names = [table[0] for table in tables] |
|
|
|
|
|
expected_tables = ['users', 'benchmarks', 'models', 'evaluations', 'queue'] |
|
for table in expected_tables: |
|
self.assertIn(table, table_names) |
|
|
|
conn.close() |
|
|
|
def test_user_management(self): |
|
"""Test user management functionality.""" |
|
|
|
user_id = self.db.add_user("test_user", "test_hf_id", False) |
|
self.assertIsNotNone(user_id) |
|
|
|
|
|
admin_id = self.db.add_user("admin_user", "admin_hf_id", True) |
|
self.assertIsNotNone(admin_id) |
|
|
|
|
|
self.assertTrue(self.db.can_submit_today(user_id)) |
|
self.db.update_submission_date(user_id) |
|
self.assertFalse(self.db.can_submit_today(user_id)) |
|
|
|
|
|
self.assertTrue(self.db.can_submit_today(admin_id)) |
|
|
|
def test_benchmark_management(self): |
|
"""Test benchmark management functionality.""" |
|
|
|
benchmark_id = self.db.add_benchmark( |
|
name="Test Benchmark", |
|
dataset_id="test/dataset", |
|
description="Test description", |
|
metrics={"accuracy": 1.0} |
|
) |
|
self.assertIsNotNone(benchmark_id) |
|
|
|
|
|
benchmarks = self.db.get_benchmarks() |
|
self.assertEqual(len(benchmarks), 1) |
|
self.assertEqual(benchmarks[0]["name"], "Test Benchmark") |
|
|
|
def test_model_management(self): |
|
"""Test model management functionality.""" |
|
|
|
user_id = self.db.add_user("test_user", "test_hf_id", False) |
|
|
|
|
|
model_id = self.db.add_model( |
|
name="Test Model", |
|
hf_model_id="test/model", |
|
user_id=user_id, |
|
tag="Reasoning", |
|
parameters="7B", |
|
description="Test model description" |
|
) |
|
self.assertIsNotNone(model_id) |
|
|
|
|
|
models = self.db.get_models() |
|
self.assertEqual(len(models), 1) |
|
self.assertEqual(models[0]["name"], "Test Model") |
|
|
|
|
|
models = self.db.get_models(tag="Reasoning") |
|
self.assertEqual(len(models), 1) |
|
self.assertEqual(models[0]["tag"], "Reasoning") |
|
|
|
def test_evaluation_management(self): |
|
"""Test evaluation management functionality.""" |
|
|
|
user_id = self.db.add_user("test_user", "test_hf_id", False) |
|
|
|
|
|
model_id = self.db.add_model( |
|
name="Test Model", |
|
hf_model_id="test/model", |
|
user_id=user_id, |
|
tag="Reasoning" |
|
) |
|
|
|
|
|
benchmark_id = self.db.add_benchmark( |
|
name="Test Benchmark", |
|
dataset_id="test/dataset" |
|
) |
|
|
|
|
|
evaluation_id = self.db.add_evaluation( |
|
model_id=model_id, |
|
benchmark_id=benchmark_id |
|
) |
|
self.assertIsNotNone(evaluation_id) |
|
|
|
|
|
self.db.update_evaluation_status( |
|
evaluation_id=evaluation_id, |
|
status="running" |
|
) |
|
|
|
|
|
next_eval = self.db.get_next_in_queue() |
|
self.assertIsNotNone(next_eval) |
|
self.assertEqual(next_eval["evaluation_id"], evaluation_id) |
|
|
|
|
|
self.db.update_evaluation_status( |
|
evaluation_id=evaluation_id, |
|
status="completed", |
|
results={"accuracy": 0.85}, |
|
score=85.0 |
|
) |
|
|
|
|
|
results = self.db.get_evaluation_results() |
|
self.assertEqual(len(results), 1) |
|
self.assertEqual(results[0]["score"], 85.0) |
|
|
|
def test_leaderboard(self): |
|
"""Test leaderboard functionality.""" |
|
|
|
user_id = self.db.add_user("test_user", "test_hf_id", False) |
|
|
|
|
|
model1_id = self.db.add_model( |
|
name="Model 1", |
|
hf_model_id="test/model1", |
|
user_id=user_id, |
|
tag="Reasoning" |
|
) |
|
|
|
model2_id = self.db.add_model( |
|
name="Model 2", |
|
hf_model_id="test/model2", |
|
user_id=user_id, |
|
tag="Coding" |
|
) |
|
|
|
|
|
benchmark_id = self.db.add_benchmark( |
|
name="Test Benchmark", |
|
dataset_id="test/dataset" |
|
) |
|
|
|
|
|
eval1_id = self.db.add_evaluation( |
|
model_id=model1_id, |
|
benchmark_id=benchmark_id |
|
) |
|
|
|
eval2_id = self.db.add_evaluation( |
|
model_id=model2_id, |
|
benchmark_id=benchmark_id |
|
) |
|
|
|
|
|
self.db.update_evaluation_status( |
|
evaluation_id=eval1_id, |
|
status="completed", |
|
results={"accuracy": 0.9}, |
|
score=90.0 |
|
) |
|
|
|
self.db.update_evaluation_status( |
|
evaluation_id=eval2_id, |
|
status="completed", |
|
results={"accuracy": 0.8}, |
|
score=80.0 |
|
) |
|
|
|
|
|
df = self.leaderboard.get_leaderboard_data() |
|
self.assertEqual(len(df), 2) |
|
|
|
|
|
df_reasoning = self.leaderboard.get_leaderboard_data(tag="Reasoning") |
|
self.assertEqual(len(df_reasoning), 1) |
|
self.assertEqual(df_reasoning.iloc[0]["score"], 90.0) |
|
|
|
df_coding = self.leaderboard.get_leaderboard_data(tag="Coding") |
|
self.assertEqual(len(df_coding), 1) |
|
self.assertEqual(df_coding.iloc[0]["score"], 80.0) |
|
|
|
if __name__ == "__main__": |
|
unittest.main() |
|
|