""" Test script for Dynamic Highscores application. This script tests the key functionality of the Dynamic Highscores application to ensure everything works as expected before deployment. """ import os import unittest import tempfile import sqlite3 from unittest.mock import MagicMock, patch # Import components to test from database_schema import DynamicHighscoresDB from auth import HuggingFaceAuth from benchmark_selection import BenchmarkSelector from evaluation_queue import EvaluationQueue from leaderboard import Leaderboard class TestDynamicHighscores(unittest.TestCase): """Test cases for Dynamic Highscores application.""" def setUp(self): """Set up test environment.""" # Create temporary database self.db_fd, self.db_path = tempfile.mkstemp() self.db = DynamicHighscoresDB(self.db_path) # Mock auth manager self.auth_manager = HuggingFaceAuth(self.db) # Mock components self.benchmark_selector = BenchmarkSelector(self.db, self.auth_manager) self.evaluation_queue = EvaluationQueue(self.db, self.auth_manager) self.leaderboard = Leaderboard(self.db) def tearDown(self): """Clean up test environment.""" os.close(self.db_fd) os.unlink(self.db_path) def test_database_schema(self): """Test database schema creation.""" # Check if tables were created conn = sqlite3.connect(self.db_path) cursor = conn.cursor() # Get list of tables cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() table_names = [table[0] for table in tables] # Check if all expected tables exist expected_tables = ['users', 'benchmarks', 'models', 'evaluations', 'queue'] for table in expected_tables: self.assertIn(table, table_names) conn.close() def test_user_management(self): """Test user management functionality.""" # Add a test user user_id = self.db.add_user("test_user", "test_hf_id", False) self.assertIsNotNone(user_id) # Add an admin user admin_id = self.db.add_user("admin_user", "admin_hf_id", True) self.assertIsNotNone(admin_id) # Test submission limits self.assertTrue(self.db.can_submit_today(user_id)) self.db.update_submission_date(user_id) self.assertFalse(self.db.can_submit_today(user_id)) # Admin should always be able to submit self.assertTrue(self.db.can_submit_today(admin_id)) def test_benchmark_management(self): """Test benchmark management functionality.""" # Add a test benchmark benchmark_id = self.db.add_benchmark( name="Test Benchmark", dataset_id="test/dataset", description="Test description", metrics={"accuracy": 1.0} ) self.assertIsNotNone(benchmark_id) # Get benchmarks benchmarks = self.db.get_benchmarks() self.assertEqual(len(benchmarks), 1) self.assertEqual(benchmarks[0]["name"], "Test Benchmark") def test_model_management(self): """Test model management functionality.""" # Add a test user user_id = self.db.add_user("test_user", "test_hf_id", False) # Add a test model model_id = self.db.add_model( name="Test Model", hf_model_id="test/model", user_id=user_id, tag="Reasoning", parameters="7B", description="Test model description" ) self.assertIsNotNone(model_id) # Get models models = self.db.get_models() self.assertEqual(len(models), 1) self.assertEqual(models[0]["name"], "Test Model") # Get models by tag models = self.db.get_models(tag="Reasoning") self.assertEqual(len(models), 1) self.assertEqual(models[0]["tag"], "Reasoning") def test_evaluation_management(self): """Test evaluation management functionality.""" # Add a test user user_id = self.db.add_user("test_user", "test_hf_id", False) # Add a test model model_id = self.db.add_model( name="Test Model", hf_model_id="test/model", user_id=user_id, tag="Reasoning" ) # Add a test benchmark benchmark_id = self.db.add_benchmark( name="Test Benchmark", dataset_id="test/dataset" ) # Add a test evaluation evaluation_id = self.db.add_evaluation( model_id=model_id, benchmark_id=benchmark_id ) self.assertIsNotNone(evaluation_id) # Update evaluation status self.db.update_evaluation_status( evaluation_id=evaluation_id, status="running" ) # Get next in queue next_eval = self.db.get_next_in_queue() self.assertIsNotNone(next_eval) self.assertEqual(next_eval["evaluation_id"], evaluation_id) # Complete evaluation self.db.update_evaluation_status( evaluation_id=evaluation_id, status="completed", results={"accuracy": 0.85}, score=85.0 ) # Get evaluation results results = self.db.get_evaluation_results() self.assertEqual(len(results), 1) self.assertEqual(results[0]["score"], 85.0) def test_leaderboard(self): """Test leaderboard functionality.""" # Add test data user_id = self.db.add_user("test_user", "test_hf_id", False) # Add models with different tags model1_id = self.db.add_model( name="Model 1", hf_model_id="test/model1", user_id=user_id, tag="Reasoning" ) model2_id = self.db.add_model( name="Model 2", hf_model_id="test/model2", user_id=user_id, tag="Coding" ) # Add a benchmark benchmark_id = self.db.add_benchmark( name="Test Benchmark", dataset_id="test/dataset" ) # Add evaluations eval1_id = self.db.add_evaluation( model_id=model1_id, benchmark_id=benchmark_id ) eval2_id = self.db.add_evaluation( model_id=model2_id, benchmark_id=benchmark_id ) # Complete evaluations self.db.update_evaluation_status( evaluation_id=eval1_id, status="completed", results={"accuracy": 0.9}, score=90.0 ) self.db.update_evaluation_status( evaluation_id=eval2_id, status="completed", results={"accuracy": 0.8}, score=80.0 ) # Get leaderboard data df = self.leaderboard.get_leaderboard_data() self.assertEqual(len(df), 2) # Test filtering by tag df_reasoning = self.leaderboard.get_leaderboard_data(tag="Reasoning") self.assertEqual(len(df_reasoning), 1) self.assertEqual(df_reasoning.iloc[0]["score"], 90.0) df_coding = self.leaderboard.get_leaderboard_data(tag="Coding") self.assertEqual(len(df_coding), 1) self.assertEqual(df_coding.iloc[0]["score"], 80.0) if __name__ == "__main__": unittest.main()