Upload app.py
Browse files
app.py
CHANGED
@@ -13,15 +13,13 @@ from auth import HuggingFaceAuth
|
|
13 |
from benchmark_selection import BenchmarkSelector, create_benchmark_selection_ui
|
14 |
from evaluation_queue import EvaluationQueue, create_model_submission_ui
|
15 |
from leaderboard import Leaderboard, create_leaderboard_ui
|
16 |
-
from model_config import ModelConfigManager, create_community_framework_ui
|
17 |
from sample_benchmarks import add_sample_benchmarks
|
18 |
|
19 |
# Initialize components in main thread
|
20 |
db = DynamicHighscoresDB()
|
21 |
auth_manager = HuggingFaceAuth(db)
|
22 |
benchmark_selector = BenchmarkSelector(db, auth_manager)
|
23 |
-
|
24 |
-
evaluation_queue = EvaluationQueue(db, auth_manager, model_config_manager)
|
25 |
leaderboard = Leaderboard(db)
|
26 |
|
27 |
# Initialize sample benchmarks if none exist
|
@@ -322,7 +320,60 @@ with gr.Blocks(css=css, title="Dynamic Highscores") as app:
|
|
322 |
benchmark_ui = create_benchmark_selection_ui(benchmark_selector, auth_manager)
|
323 |
|
324 |
with gr.TabItem("🌐 Community Framework", id=3):
|
325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
|
327 |
gr.Markdown("""
|
328 |
### About Dynamic Highscores
|
|
|
13 |
from benchmark_selection import BenchmarkSelector, create_benchmark_selection_ui
|
14 |
from evaluation_queue import EvaluationQueue, create_model_submission_ui
|
15 |
from leaderboard import Leaderboard, create_leaderboard_ui
|
|
|
16 |
from sample_benchmarks import add_sample_benchmarks
|
17 |
|
18 |
# Initialize components in main thread
|
19 |
db = DynamicHighscoresDB()
|
20 |
auth_manager = HuggingFaceAuth(db)
|
21 |
benchmark_selector = BenchmarkSelector(db, auth_manager)
|
22 |
+
evaluation_queue = EvaluationQueue(db, auth_manager)
|
|
|
23 |
leaderboard = Leaderboard(db)
|
24 |
|
25 |
# Initialize sample benchmarks if none exist
|
|
|
320 |
benchmark_ui = create_benchmark_selection_ui(benchmark_selector, auth_manager)
|
321 |
|
322 |
with gr.TabItem("🌐 Community Framework", id=3):
|
323 |
+
# Create a simple placeholder for the Community Framework tab
|
324 |
+
gr.Markdown("""
|
325 |
+
# 🌐 Dynamic Highscores Community Framework
|
326 |
+
|
327 |
+
## About Dynamic Highscores
|
328 |
+
|
329 |
+
Dynamic Highscores is an open-source community benchmark system for evaluating language models on any dataset. This project was created to fill the gap left by the retirement of HuggingFace's "Open LLM Leaderboards" which were discontinued due to outdated benchmarks.
|
330 |
+
|
331 |
+
### Key Features
|
332 |
+
|
333 |
+
- **Flexible Benchmarking**: Test models against any HuggingFace dataset, not just predefined benchmarks
|
334 |
+
- **Community-Driven**: Anyone can add new benchmarks and submit models for evaluation
|
335 |
+
- **Modern Evaluation**: Focus on contemporary benchmarks that better reflect current model capabilities
|
336 |
+
- **CPU-Only Evaluation**: Ensures fair comparisons across different models
|
337 |
+
- **Daily Submission Limits**: Prevents system abuse (one benchmark per day per user)
|
338 |
+
- **Model Tagging**: Categorize models as Merge, Agent, Reasoning, Coding, etc.
|
339 |
+
- **Unified Leaderboard**: View all models with filtering capabilities by tags
|
340 |
+
|
341 |
+
### Why This Project Matters
|
342 |
+
|
343 |
+
When HuggingFace retired their "Open LLM Leaderboards," the community lost a valuable resource for comparing model performance. The benchmarks used had become outdated and didn't reflect the rapid advances in language model capabilities.
|
344 |
+
|
345 |
+
Dynamic Highscores addresses this issue by allowing users to select from any benchmark on HuggingFace, including the most recent and relevant datasets. This ensures that models are evaluated on tasks that matter for current applications.
|
346 |
+
|
347 |
+
## Model Configuration System (Coming Soon)
|
348 |
+
|
349 |
+
We're working on a modular system for model configurations that will allow users to:
|
350 |
+
|
351 |
+
- Create and apply predefined configurations for different model types
|
352 |
+
- Define parameters such as Temperature, Top-K, Min-P, Top-P, and Repetition Penalty
|
353 |
+
- Share optimal configurations with the community
|
354 |
+
|
355 |
+
### Example Configuration (Gemma)
|
356 |
+
|
357 |
+
```
|
358 |
+
Temperature: 1.0
|
359 |
+
Top_K: 64
|
360 |
+
Min_P: 0.01
|
361 |
+
Top_P: 0.95
|
362 |
+
Repetition Penalty: 1.0
|
363 |
+
```
|
364 |
+
|
365 |
+
## Contributing to the Project
|
366 |
+
|
367 |
+
We welcome contributions from the community! If you'd like to improve Dynamic Highscores, here are some ways to get involved:
|
368 |
+
|
369 |
+
- **Add New Features**: Enhance the platform with additional functionality
|
370 |
+
- **Improve Evaluation Methods**: Help make model evaluations more accurate and efficient
|
371 |
+
- **Fix Bugs**: Address issues in the codebase
|
372 |
+
- **Enhance Documentation**: Make the project more accessible to new users
|
373 |
+
- **Add Model Configurations**: Contribute optimal configurations for different model types
|
374 |
+
|
375 |
+
To contribute, fork the repository, make your changes, and submit a pull request. We appreciate all contributions, big or small!
|
376 |
+
""")
|
377 |
|
378 |
gr.Markdown("""
|
379 |
### About Dynamic Highscores
|