|
""" |
|
Main application for Dynamic Highscores system. |
|
|
|
This file integrates all components into a unified application. |
|
""" |
|
|
|
import os |
|
import gradio as gr |
|
import threading |
|
import time |
|
|
|
|
|
os.environ["OAUTH_CLIENT_ID"] = "hf" |
|
os.environ["OAUTH_CLIENT_SECRET"] = "hf" |
|
os.environ["OAUTH_AUTHORIZATION_URL"] = "https://huggingface.co/oauth/authorize" |
|
os.environ["OAUTH_TOKEN_URL"] = "https://huggingface.co/oauth/token" |
|
os.environ["OAUTH_SCOPES"] = "inference" |
|
os.environ["OPENID_PROVIDER_URL"] = "https://huggingface.co" |
|
|
|
from database_schema import DynamicHighscoresDB |
|
from auth import HuggingFaceAuth, create_login_ui, setup_auth_handlers |
|
from benchmark_selection import BenchmarkSelector, create_benchmark_selection_ui |
|
from evaluation_queue import EvaluationQueue, create_model_submission_ui |
|
from leaderboard import Leaderboard, create_leaderboard_ui |
|
from sample_benchmarks import add_sample_benchmarks |
|
|
|
|
|
db = DynamicHighscoresDB() |
|
auth_manager = HuggingFaceAuth(db) |
|
benchmark_selector = BenchmarkSelector(db, auth_manager) |
|
evaluation_queue = EvaluationQueue(db, auth_manager) |
|
leaderboard = Leaderboard(db) |
|
|
|
|
|
print("Checking for existing benchmarks...") |
|
benchmarks = db.get_benchmarks() |
|
if not benchmarks or len(benchmarks) == 0: |
|
print("No benchmarks found. Adding sample benchmarks...") |
|
try: |
|
|
|
print(f"Database path: {db.db_path}") |
|
|
|
|
|
num_added = add_sample_benchmarks() |
|
print(f"Added {num_added} sample benchmarks.") |
|
except Exception as e: |
|
print(f"Error adding sample benchmarks: {str(e)}") |
|
|
|
try: |
|
print("Attempting direct benchmark insertion...") |
|
db.add_benchmark( |
|
name="MMLU (Massive Multitask Language Understanding)", |
|
dataset_id="cais/mmlu", |
|
description="Tests knowledge across 57 subjects" |
|
) |
|
print("Added fallback benchmark.") |
|
except Exception as inner_e: |
|
print(f"Fallback insertion failed: {str(inner_e)}") |
|
else: |
|
print(f"Found {len(benchmarks)} existing benchmarks.") |
|
|
|
|
|
css = """ |
|
/* Theme-adaptive colored info box */ |
|
.info-text { |
|
background-color: rgba(53, 130, 220, 0.1); |
|
padding: 12px; |
|
border-radius: 8px; |
|
border-left: 4px solid #3498db; |
|
margin: 12px 0; |
|
} |
|
|
|
/* High-contrast text for elements - works in light and dark themes */ |
|
.info-text, .header, .footer, .tab-content, |
|
button, input, textarea, select, option, |
|
.gradio-container *, .markdown-text { |
|
color: var(--text-color, inherit) !important; |
|
} |
|
|
|
/* Container styling */ |
|
.container { |
|
max-width: 1200px; |
|
margin: 0 auto; |
|
} |
|
|
|
/* Header styling */ |
|
.header { |
|
text-align: center; |
|
margin-bottom: 20px; |
|
font-weight: bold; |
|
font-size: 24px; |
|
} |
|
|
|
/* Footer styling */ |
|
.footer { |
|
text-align: center; |
|
margin-top: 40px; |
|
padding: 20px; |
|
border-top: 1px solid var(--border-color-primary, #eee); |
|
} |
|
|
|
/* Login section styling */ |
|
.login-section { |
|
padding: 10px; |
|
margin-bottom: 15px; |
|
border-radius: 8px; |
|
background-color: rgba(250, 250, 250, 0.1); |
|
text-align: center; |
|
} |
|
|
|
/* Login button styling */ |
|
.login-button { |
|
background-color: #4CAF50 !important; |
|
color: white !important; |
|
font-weight: bold; |
|
} |
|
|
|
/* Force high contrast on specific input areas */ |
|
input[type="text"], input[type="password"], textarea { |
|
background-color: var(--background-fill-primary) !important; |
|
color: var(--body-text-color) !important; |
|
} |
|
|
|
/* Force text visibility in multiple contexts */ |
|
.gradio-markdown p, .gradio-markdown h1, .gradio-markdown h2, |
|
.gradio-markdown h3, .gradio-markdown h4, .gradio-markdown li { |
|
color: var(--body-text-color) !important; |
|
} |
|
|
|
/* Fix dark mode text visibility */ |
|
@media (prefers-color-scheme: dark) { |
|
input, textarea, select { |
|
color: #ffffff !important; |
|
} |
|
|
|
::placeholder { |
|
color: rgba(255, 255, 255, 0.5) !important; |
|
} |
|
} |
|
""" |
|
|
|
|
|
def is_running_in_hf_space(): |
|
return 'SPACE_ID' in os.environ |
|
|
|
|
|
def trigger_hf_oauth(): |
|
return """ |
|
<script> |
|
// Redirect to HuggingFace Space login |
|
window.location.href = window.location.origin + "?__space_auth_callback=true"; |
|
</script> |
|
""" |
|
|
|
|
|
def start_queue_worker(): |
|
|
|
time.sleep(2) |
|
try: |
|
print("Starting evaluation queue worker...") |
|
evaluation_queue.start_worker() |
|
except Exception as e: |
|
print(f"Error starting queue worker: {e}") |
|
|
|
|
|
with gr.Blocks(css=css, title="Dynamic Highscores") as app: |
|
login_info = gr.LoginButton(visible=True) |
|
auth_status = gr.State(None) |
|
|
|
|
|
with gr.Row(visible=True, elem_classes=["login-section"]) as login_section: |
|
with gr.Column(): |
|
login_status = gr.Markdown("### π Not logged in", elem_id="login-status") |
|
login_button = gr.Button("Login with HuggingFace", size="lg", variant="primary", elem_classes=["login-button"]) |
|
|
|
gr.Markdown("# π Dynamic Highscores", elem_classes=["header"]) |
|
gr.Markdown(""" |
|
Welcome to Dynamic Highscores - a community benchmark platform for evaluating and comparing language models. |
|
|
|
- **Add your own benchmarks** from HuggingFace datasets |
|
- **Submit your models** for CPU-only evaluation |
|
- **Compare performance** across different models and benchmarks |
|
- **Filter results** by model type (Merge, Agent, Reasoning, Coding, etc.) |
|
""", elem_classes=["info-text"]) |
|
|
|
|
|
with gr.Tabs() as tabs: |
|
with gr.TabItem("π Leaderboard", id=0): |
|
leaderboard_ui = create_leaderboard_ui(leaderboard, db) |
|
|
|
with gr.TabItem("π Submit Model", id=1): |
|
submission_ui = create_model_submission_ui(evaluation_queue, auth_manager, db) |
|
|
|
with gr.TabItem("π Benchmarks", id=2): |
|
benchmark_ui = create_benchmark_selection_ui(benchmark_selector, auth_manager) |
|
|
|
gr.Markdown(""" |
|
### About Dynamic Highscores |
|
|
|
This platform allows users to select benchmarks from HuggingFace datasets and evaluate models against them. |
|
Each user can submit one benchmark per day (admin users are exempt from this limit). |
|
All evaluations run on CPU only to ensure fair comparisons. |
|
|
|
Created by Quazim0t0 |
|
""", elem_classes=["footer"]) |
|
|
|
|
|
def check_auth_on_load(request: gr.Request): |
|
|
|
if is_running_in_hf_space(): |
|
username = request.headers.get("HF-User") |
|
if username: |
|
print(f"Detected logged-in user via Space OAuth: {username}") |
|
|
|
user = db.get_user_by_username(username) |
|
if not user: |
|
print(f"Creating new user: {username}") |
|
is_admin = (username == "Quazim0t0") |
|
db.add_user(username, username, is_admin) |
|
user = db.get_user_by_username(username) |
|
|
|
|
|
return user, f"### β
Logged in as {username}", gr.update(visible=False) |
|
|
|
|
|
return None, "### π Not logged in", gr.update(visible=True) |
|
|
|
|
|
login_button.click( |
|
fn=trigger_hf_oauth, |
|
inputs=[], |
|
outputs=[gr.HTML()] |
|
) |
|
|
|
|
|
app.load( |
|
fn=check_auth_on_load, |
|
inputs=[], |
|
outputs=[auth_status, login_status, login_section] |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
queue_thread = threading.Thread(target=start_queue_worker) |
|
queue_thread.daemon = True |
|
queue_thread.start() |
|
|
|
app.launch() |