Spaces:
Sleeping
Sleeping
""" | |
Main application for Dynamic Highscores system. | |
This file integrates all components into a unified application. | |
""" | |
import os | |
import gradio as gr | |
import threading | |
import time | |
from database_schema import DynamicHighscoresDB | |
from auth import HuggingFaceAuth, create_login_ui, setup_auth_handlers | |
from benchmark_selection import BenchmarkSelector, create_benchmark_selection_ui | |
from evaluation_queue import EvaluationQueue, create_model_submission_ui | |
from leaderboard import Leaderboard, create_leaderboard_ui | |
from sample_benchmarks import add_sample_benchmarks | |
os.environ["OAUTH_SCOPES"] = "inference" | |
# Initialize components in main thread | |
db = DynamicHighscoresDB() | |
auth_manager = HuggingFaceAuth(db) | |
benchmark_selector = BenchmarkSelector(db, auth_manager) | |
evaluation_queue = EvaluationQueue(db, auth_manager) | |
leaderboard = Leaderboard(db) | |
# Initialize sample benchmarks if none exist | |
print("Checking for existing benchmarks...") | |
benchmarks = db.get_benchmarks() | |
if not benchmarks or len(benchmarks) == 0: | |
print("No benchmarks found. Adding sample benchmarks...") | |
try: | |
# Make sure the database path is clear | |
print(f"Database path: {db.db_path}") | |
# Import and call the function directly | |
num_added = add_sample_benchmarks() | |
print(f"Added {num_added} sample benchmarks.") | |
except Exception as e: | |
print(f"Error adding sample benchmarks: {str(e)}") | |
# Try direct DB insertion as fallback | |
try: | |
print("Attempting direct benchmark insertion...") | |
db.add_benchmark( | |
name="MMLU (Massive Multitask Language Understanding)", | |
dataset_id="cais/mmlu", | |
description="Tests knowledge across 57 subjects" | |
) | |
print("Added fallback benchmark.") | |
except Exception as inner_e: | |
print(f"Fallback insertion failed: {str(inner_e)}") | |
else: | |
print(f"Found {len(benchmarks)} existing benchmarks.") | |
# Custom CSS with theme awareness | |
css = """ | |
/* Theme-adaptive colored info box */ | |
.info-text { | |
background-color: rgba(53, 130, 220, 0.1); | |
padding: 12px; | |
border-radius: 8px; | |
border-left: 4px solid #3498db; | |
margin: 12px 0; | |
} | |
/* High-contrast text for elements - works in light and dark themes */ | |
.info-text, .header, .footer, .tab-content, | |
button, input, textarea, select, option, | |
.gradio-container *, .markdown-text { | |
color: var(--text-color, inherit) !important; | |
} | |
/* Container styling */ | |
.container { | |
max-width: 1200px; | |
margin: 0 auto; | |
} | |
/* Header styling */ | |
.header { | |
text-align: center; | |
margin-bottom: 20px; | |
font-weight: bold; | |
font-size: 24px; | |
} | |
/* Footer styling */ | |
.footer { | |
text-align: center; | |
margin-top: 40px; | |
padding: 20px; | |
border-top: 1px solid var(--border-color-primary, #eee); | |
} | |
/* Login section styling */ | |
.login-section { | |
padding: 10px; | |
margin-bottom: 15px; | |
border-radius: 8px; | |
background-color: rgba(250, 250, 250, 0.1); | |
text-align: center; | |
} | |
/* Login button styling */ | |
.login-button { | |
background-color: #4CAF50 !important; | |
color: white !important; | |
font-weight: bold; | |
} | |
/* Force high contrast on specific input areas */ | |
input[type="text"], input[type="password"], textarea { | |
background-color: var(--background-fill-primary) !important; | |
color: var(--body-text-color) !important; | |
} | |
/* Force text visibility in multiple contexts */ | |
.gradio-markdown p, .gradio-markdown h1, .gradio-markdown h2, | |
.gradio-markdown h3, .gradio-markdown h4, .gradio-markdown li { | |
color: var(--body-text-color) !important; | |
} | |
/* Fix dark mode text visibility */ | |
@media (prefers-color-scheme: dark) { | |
input, textarea, select { | |
color: #ffffff !important; | |
} | |
::placeholder { | |
color: rgba(255, 255, 255, 0.5) !important; | |
} | |
} | |
""" | |
# Check if the server is running in a HuggingFace Space | |
def is_running_in_hf_space(): | |
return 'SPACE_ID' in os.environ | |
# Function to trigger HuggingFace OAuth login | |
def trigger_hf_oauth(): | |
return """ | |
<script> | |
// Redirect to HuggingFace Space login | |
window.location.href = window.location.origin + "?__auth_callback=true"; | |
</script> | |
""" | |
# Start evaluation queue worker | |
def start_queue_worker(): | |
# Wait a moment to ensure app is initialized | |
time.sleep(2) | |
try: | |
print("Starting evaluation queue worker...") | |
evaluation_queue.start_worker() | |
except Exception as e: | |
print(f"Error starting queue worker: {e}") | |
# Create Gradio app | |
with gr.Blocks(css=css, title="Dynamic Highscores", theme=gr.themes.Soft()) as app: | |
gr.LoginButton(min_width=250, size="lg", variant="primary") | |
auth_status = gr.State(None) # Store user auth state | |
# Add a prominent login section at the top | |
with gr.Row(visible=True, elem_classes=["login-section"]) as login_section: | |
with gr.Column(): | |
login_status = gr.Markdown("### π Not logged in", elem_id="login-status") | |
login_button = gr.Button("Login with HuggingFace", size="lg", variant="primary", elem_classes=["login-button"]) | |
gr.Markdown("# π Dynamic Highscores", elem_classes=["header"]) | |
gr.Markdown(""" | |
Welcome to Dynamic Highscores - a community benchmark platform for evaluating and comparing language models. | |
- **Add your own benchmarks** from HuggingFace datasets | |
- **Submit your models** for CPU-only evaluation | |
- **Compare performance** across different models and benchmarks | |
- **Filter results** by model type (Merge, Agent, Reasoning, Coding, etc.) | |
""", elem_classes=["info-text"]) | |
# Main tabs | |
with gr.Tabs() as tabs: | |
with gr.TabItem("π Leaderboard", id=0): | |
leaderboard_ui = create_leaderboard_ui(leaderboard, db) | |
with gr.TabItem("π Submit Model", id=1): | |
submission_ui = create_model_submission_ui(evaluation_queue, auth_manager, db) | |
with gr.TabItem("π Benchmarks", id=2): | |
benchmark_ui = create_benchmark_selection_ui(benchmark_selector, auth_manager) | |
gr.Markdown(""" | |
### About Dynamic Highscores | |
This platform allows users to select benchmarks from HuggingFace datasets and evaluate models against them. | |
Each user can submit one benchmark per day (admin users are exempt from this limit). | |
All evaluations run on CPU only to ensure fair comparisons. | |
Created by Quazim0t0 | |
""", elem_classes=["footer"]) | |
# Auth status check on page load | |
def check_auth_on_load(request: gr.Request): | |
# Check if running in HF Space with OAuth | |
if is_running_in_hf_space(): | |
username = request.headers.get("HF-User") | |
if username: | |
print(f"Detected logged-in user via Space OAuth: {username}") | |
# Get or create user | |
user = db.get_user_by_username(username) | |
if not user: | |
print(f"Creating new user: {username}") | |
is_admin = (username == "Quazim0t0") # Replace with your admin username | |
db.add_user(username, username, is_admin) | |
user = db.get_user_by_username(username) | |
# Update UI for logged in state | |
return user, f"### β Logged in as {username}", gr.update(visible=False) | |
# Not logged in - make sure login section is visible | |
return None, "### π Not logged in", gr.update(visible=True) | |
# Connect event handlers | |
login_button.click( | |
fn=trigger_hf_oauth, | |
inputs=[], | |
outputs=[gr.HTML()] | |
) | |
# Check auth on load | |
app.load( | |
fn=check_auth_on_load, | |
inputs=[], | |
outputs=[auth_status, login_status, login_section] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
# Start queue worker in a separate thread | |
queue_thread = threading.Thread(target=start_queue_worker) | |
queue_thread.daemon = True | |
queue_thread.start() | |
app.launch() |