|
""" |
|
Main application for Dynamic Highscores system. |
|
|
|
This file integrates all components into a unified application. |
|
""" |
|
|
|
import os |
|
import gradio as gr |
|
import threading |
|
import queue |
|
from database_schema import init_db |
|
from auth import HuggingFaceAuth, create_login_ui, setup_auth_handlers |
|
from benchmark_selection import BenchmarkSelector, create_benchmark_selection_ui |
|
from evaluation_queue import EvaluationQueue, create_model_submission_ui |
|
from leaderboard import Leaderboard, create_leaderboard_ui |
|
from sample_benchmarks import add_sample_benchmarks |
|
|
|
|
|
db = init_db() |
|
|
|
|
|
auth_manager = HuggingFaceAuth(db) |
|
benchmark_selector = BenchmarkSelector(db, auth_manager) |
|
evaluation_queue = EvaluationQueue(db, auth_manager) |
|
leaderboard = Leaderboard(db) |
|
|
|
|
|
benchmarks = db.get_benchmarks() |
|
if not benchmarks or len(benchmarks) == 0: |
|
print("No benchmarks found. Adding sample benchmarks...") |
|
num_added = add_sample_benchmarks() |
|
print(f"Added {num_added} sample benchmarks.") |
|
|
|
|
|
css = """ |
|
.info-text { |
|
background-color: #f0f7ff; |
|
padding: 12px; |
|
border-radius: 8px; |
|
border-left: 4px solid #3498db; |
|
margin: 12px 0; |
|
} |
|
|
|
.container { |
|
max-width: 1200px; |
|
margin: 0 auto; |
|
} |
|
|
|
.header { |
|
text-align: center; |
|
margin-bottom: 20px; |
|
} |
|
|
|
.footer { |
|
text-align: center; |
|
margin-top: 40px; |
|
padding: 20px; |
|
border-top: 1px solid #eee; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=css, title="Dynamic Highscores") as app: |
|
gr.Markdown("# π Dynamic Highscores", elem_classes=["header"]) |
|
gr.Markdown(""" |
|
Welcome to Dynamic Highscores - a community benchmark platform for evaluating and comparing language models. |
|
|
|
- **Add your own benchmarks** from HuggingFace datasets |
|
- **Submit your models** for CPU-only evaluation |
|
- **Compare performance** across different models and benchmarks |
|
- **Filter results** by model type (Merge, Agent, Reasoning, Coding, etc.) |
|
""", elem_classes=["info-text"]) |
|
|
|
|
|
login_button, logout_button, token_input, user_info = create_login_ui() |
|
setup_auth_handlers(login_button, logout_button, token_input, user_info, auth_manager) |
|
|
|
|
|
with gr.Tabs() as tabs: |
|
with gr.TabItem("π Leaderboard", id=0): |
|
|
|
leaderboard_ui = create_leaderboard_ui(leaderboard, db) |
|
|
|
with gr.TabItem("π Submit Model", id=1): |
|
submission_ui = create_model_submission_ui(evaluation_queue, auth_manager, db) |
|
|
|
with gr.TabItem("π Benchmarks", id=2): |
|
benchmark_ui = create_benchmark_selection_ui(benchmark_selector, auth_manager) |
|
|
|
gr.Markdown(""" |
|
### About Dynamic Highscores |
|
|
|
This platform allows users to select benchmarks from HuggingFace datasets and evaluate models against them. |
|
Each user can submit one benchmark per day (admin users are exempt from this limit). |
|
All evaluations run on CPU only to ensure fair comparisons. |
|
|
|
Created by Quazim0t0 |
|
""", elem_classes=["footer"]) |
|
|
|
|
|
|
|
def start_queue_worker(): |
|
|
|
import time |
|
time.sleep(2) |
|
evaluation_queue.start_worker() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
queue_thread = threading.Thread(target=start_queue_worker) |
|
queue_thread.daemon = True |
|
queue_thread.start() |
|
|
|
app.launch() |
|
|