alibayram commited on
Commit
3ce2f84
·
1 Parent(s): 1c73b10

Implement robust data loading with retry logic, enhance error handling in Gradio app, and improve user experience with fallback data for leaderboard and responses. Update configuration for request timeouts and retries.

Browse files
Files changed (6) hide show
  1. Dockerfile +32 -0
  2. README.md +80 -0
  3. app.py +51 -14
  4. config.py +3 -0
  5. data_manager.py +93 -10
  6. utils.py +91 -41
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ curl \
9
+ software-properties-common \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements first for better caching
14
+ COPY requirements.txt .
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ # Copy application code
18
+ COPY . .
19
+
20
+ # Create cache directory
21
+ RUN mkdir -p cache
22
+
23
+ # Set environment variables
24
+ ENV PYTHONUNBUFFERED=1
25
+ ENV GRADIO_SERVER_NAME=0.0.0.0
26
+ ENV GRADIO_SERVER_PORT=7860
27
+
28
+ # Expose port
29
+ EXPOSE 7860
30
+
31
+ # Command to run the application
32
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -10,6 +10,86 @@ license: cc-by-nc-4.0
10
  short_description: Leaderboard showcasing Turkish MMLU dataset results.
11
  ---
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Start the configuration
14
 
15
  Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
 
10
  short_description: Leaderboard showcasing Turkish MMLU dataset results.
11
  ---
12
 
13
+ # 🏆 Turkish MMLU Leaderboard
14
+
15
+ A web application for exploring, evaluating, and comparing AI model performance on the Turkish Massive Multitask Language Understanding (MMLU) benchmark.
16
+
17
+ ## Features
18
+
19
+ - 📊 Interactive leaderboard with filtering capabilities
20
+ - 🔍 Search through model responses
21
+ - 📈 Visualize section-wise performance results
22
+ - ➕ Submit new models for evaluation
23
+
24
+ ## Local Development
25
+
26
+ ### Prerequisites
27
+
28
+ - Python 3.8+
29
+ - pip
30
+
31
+ ### Installation
32
+
33
+ 1. Clone the repository:
34
+ ```bash
35
+ git clone https://github.com/yourusername/turkish_mmlu_leaderboard.git
36
+ cd turkish_mmlu_leaderboard
37
+ ```
38
+
39
+ 2. Install dependencies:
40
+ ```bash
41
+ pip install -r requirements.txt
42
+ ```
43
+
44
+ 3. Run the application:
45
+ ```bash
46
+ python app.py
47
+ ```
48
+
49
+ 4. Open your browser and navigate to `http://127.0.0.1:7860`
50
+
51
+ ## Deploying to Hugging Face Spaces
52
+
53
+ ### Option 1: Using the Hugging Face UI
54
+
55
+ 1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
56
+ 2. Click "Create a new Space"
57
+ 3. Select "Gradio" as the SDK
58
+ 4. Upload your files or connect to your GitHub repository
59
+ 5. The Space will automatically build and deploy
60
+
61
+ ### Option 2: Using the Dockerfile
62
+
63
+ 1. Create a new Space on Hugging Face
64
+ 2. Select "Docker" as the SDK
65
+ 3. Upload your files including the Dockerfile
66
+ 4. The Space will build and deploy using your Dockerfile
67
+
68
+ ### Troubleshooting Hugging Face Deployment
69
+
70
+ If you encounter timeout issues when loading datasets:
71
+
72
+ 1. Check the Space logs for specific error messages
73
+ 2. Increase the timeout values in `config.py`
74
+ 3. Make sure your datasets are accessible from Hugging Face Spaces
75
+ 4. Consider using smaller datasets or pre-caching data
76
+
77
+ ## Configuration
78
+
79
+ The application can be configured by modifying the `config.py` file:
80
+
81
+ - `DatasetConfig`: Configure dataset paths, cache settings, and refresh intervals
82
+ - `UIConfig`: Customize the UI appearance
83
+ - `ModelConfig`: Define model-related options
84
+
85
+ ## Contributing
86
+
87
+ Contributions are welcome! Please feel free to submit a Pull Request.
88
+
89
+ ## License
90
+
91
+ This project is licensed under the MIT License - see the LICENSE file for details.
92
+
93
  # Start the configuration
94
 
95
  Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
  from typing import Optional
4
  import logging
 
 
5
 
6
  from config import CONFIG
7
  from data_manager import data_manager
@@ -13,6 +15,29 @@ logger = logging.getLogger(__name__)
13
  def create_app() -> gr.Blocks:
14
  """Create and configure the Gradio application."""
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  with gr.Blocks(css=CONFIG["ui"].css, theme=CONFIG["ui"].theme) as app:
17
  gr.HTML(f"<h1>{CONFIG['ui'].title}</h1>")
18
  gr.Markdown(CONFIG["ui"].description)
@@ -22,12 +47,12 @@ def create_app() -> gr.Blocks:
22
  with gr.TabItem("📊 Leaderboard"):
23
  with gr.Row():
24
  family_filter = gr.Dropdown(
25
- choices=data_manager.leaderboard_data["family"].unique().tolist(),
26
  label="Filter by Family",
27
  multiselect=False
28
  )
29
  quantization_filter = gr.Dropdown(
30
- choices=data_manager.leaderboard_data["quantization_level"].unique().tolist(),
31
  label="Filter by Quantization Level"
32
  )
33
 
@@ -47,7 +72,7 @@ def create_app() -> gr.Blocks:
47
  with gr.TabItem("🔍 Model Responses"):
48
  with gr.Row():
49
  model_dropdown = gr.Dropdown(
50
- choices=data_manager.leaderboard_data["model"].unique().tolist(),
51
  label="Select Model"
52
  )
53
  query_input = gr.Textbox(
@@ -113,18 +138,30 @@ def create_app() -> gr.Blocks:
113
  return app
114
 
115
  def main():
116
- # Initialize scheduler for data refresh
117
- scheduler = BackgroundScheduler()
118
- scheduler.add_job(
119
- data_manager.refresh_datasets,
120
- "interval",
121
- seconds=CONFIG["dataset"].refresh_interval
122
- )
123
- scheduler.start()
 
124
 
125
- # Create and launch app
126
- app = create_app()
127
- app.queue(default_concurrency_limit=40).launch()
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  if __name__ == "__main__":
130
  main()
 
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
  from typing import Optional
4
  import logging
5
+ import sys
6
+ import time
7
 
8
  from config import CONFIG
9
  from data_manager import data_manager
 
15
  def create_app() -> gr.Blocks:
16
  """Create and configure the Gradio application."""
17
 
18
+ # Pre-load data with retries to avoid startup failures
19
+ def safe_get_data():
20
+ max_attempts = 3
21
+ for attempt in range(max_attempts):
22
+ try:
23
+ logger.info(f"Pre-loading data (attempt {attempt+1}/{max_attempts})...")
24
+ # Try to access data to trigger loading
25
+ families = data_manager.leaderboard_data["family"].unique().tolist() if not data_manager.leaderboard_data.empty else []
26
+ models = data_manager.leaderboard_data["model"].unique().tolist() if not data_manager.leaderboard_data.empty else []
27
+ logger.info(f"Successfully loaded data with {len(families)} families and {len(models)} models")
28
+ return True
29
+ except Exception as e:
30
+ logger.error(f"Error pre-loading data: {e}")
31
+ if attempt < max_attempts - 1:
32
+ logger.info(f"Retrying in {CONFIG['dataset'].retry_delay} seconds...")
33
+ time.sleep(CONFIG["dataset"].retry_delay)
34
+ else:
35
+ logger.warning("Using fallback data due to loading failures")
36
+ return False
37
+
38
+ # Try to pre-load data
39
+ safe_get_data()
40
+
41
  with gr.Blocks(css=CONFIG["ui"].css, theme=CONFIG["ui"].theme) as app:
42
  gr.HTML(f"<h1>{CONFIG['ui'].title}</h1>")
43
  gr.Markdown(CONFIG["ui"].description)
 
47
  with gr.TabItem("📊 Leaderboard"):
48
  with gr.Row():
49
  family_filter = gr.Dropdown(
50
+ choices=data_manager.leaderboard_data["family"].unique().tolist() if not data_manager.leaderboard_data.empty else [],
51
  label="Filter by Family",
52
  multiselect=False
53
  )
54
  quantization_filter = gr.Dropdown(
55
+ choices=data_manager.leaderboard_data["quantization_level"].unique().tolist() if not data_manager.leaderboard_data.empty else [],
56
  label="Filter by Quantization Level"
57
  )
58
 
 
72
  with gr.TabItem("🔍 Model Responses"):
73
  with gr.Row():
74
  model_dropdown = gr.Dropdown(
75
+ choices=data_manager.leaderboard_data["model"].unique().tolist() if not data_manager.leaderboard_data.empty else [],
76
  label="Select Model"
77
  )
78
  query_input = gr.Textbox(
 
138
  return app
139
 
140
  def main():
141
+ try:
142
+ # Initialize scheduler for data refresh
143
+ scheduler = BackgroundScheduler()
144
+ scheduler.add_job(
145
+ data_manager.refresh_datasets,
146
+ "interval",
147
+ seconds=CONFIG["dataset"].refresh_interval
148
+ )
149
+ scheduler.start()
150
 
151
+ # Create and launch app
152
+ app = create_app()
153
+ app.queue(default_concurrency_limit=40).launch(
154
+ inbrowser=True,
155
+ server_name="0.0.0.0", # Use 0.0.0.0 to listen on all interfaces
156
+ server_port=7860,
157
+ share=False,
158
+ debug=False,
159
+ show_error=True,
160
+ max_threads=40
161
+ )
162
+ except Exception as e:
163
+ logger.error(f"Error starting application: {e}")
164
+ sys.exit(1)
165
 
166
  if __name__ == "__main__":
167
  main()
config.py CHANGED
@@ -8,6 +8,9 @@ class DatasetConfig:
8
  section_results_path: str = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
9
  cache_dir: str = "cache"
10
  refresh_interval: int = 1800 # 30 minutes
 
 
 
11
 
12
  @dataclass
13
  class UIConfig:
 
8
  section_results_path: str = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
9
  cache_dir: str = "cache"
10
  refresh_interval: int = 1800 # 30 minutes
11
+ request_timeout: int = 60 # seconds
12
+ max_retries: int = 5
13
+ retry_delay: int = 2 # seconds
14
 
15
  @dataclass
16
  class UIConfig:
data_manager.py CHANGED
@@ -3,36 +3,119 @@ import pandas as pd
3
  from functools import lru_cache
4
  from huggingface_hub import snapshot_download
5
  import logging
 
 
 
 
 
6
  from config import CONFIG
7
 
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class DataManager:
12
  def __init__(self):
13
  self._leaderboard_data: Optional[pd.DataFrame] = None
14
  self._responses_data: Optional[pd.DataFrame] = None
15
  self._section_results_data: Optional[pd.DataFrame] = None
 
 
 
16
 
17
- @lru_cache(maxsize=1)
18
  def _load_dataset(self, path: str) -> pd.DataFrame:
19
- """Load dataset with caching."""
20
- try:
21
- return pd.read_parquet(path)
22
- except Exception as e:
23
- logger.error(f"Error loading dataset from {path}: {e}")
24
- raise RuntimeError(f"Failed to load dataset: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def refresh_datasets(self) -> None:
27
  """Refresh all datasets from source."""
28
  try:
 
29
  snapshot_download(
30
  repo_id="alibayram",
31
  repo_type="dataset",
32
- local_dir=CONFIG["dataset"].cache_dir
 
 
33
  )
34
- # Clear cache to force reload
35
- self._load_dataset.cache_clear()
 
 
36
  logger.info("Datasets refreshed successfully")
37
  except Exception as e:
38
  logger.error(f"Error refreshing datasets: {e}")
 
3
  from functools import lru_cache
4
  from huggingface_hub import snapshot_download
5
  import logging
6
+ import time
7
+ import os
8
+ import requests
9
+ from requests.adapters import HTTPAdapter
10
+ from urllib3.util.retry import Retry
11
  from config import CONFIG
12
 
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
+ # Configure requests with retries
17
+ def create_retry_session(
18
+ retries=5,
19
+ backoff_factor=0.5,
20
+ status_forcelist=(500, 502, 503, 504),
21
+ timeout=30
22
+ ):
23
+ """Create a requests session with retry capabilities"""
24
+ session = requests.Session()
25
+ retry = Retry(
26
+ total=retries,
27
+ read=retries,
28
+ connect=retries,
29
+ backoff_factor=backoff_factor,
30
+ status_forcelist=status_forcelist,
31
+ )
32
+ adapter = HTTPAdapter(max_retries=retry)
33
+ session.mount('http://', adapter)
34
+ session.mount('https://', adapter)
35
+ session.timeout = timeout
36
+ return session
37
+
38
  class DataManager:
39
  def __init__(self):
40
  self._leaderboard_data: Optional[pd.DataFrame] = None
41
  self._responses_data: Optional[pd.DataFrame] = None
42
  self._section_results_data: Optional[pd.DataFrame] = None
43
+ self._session = create_retry_session()
44
+ self._max_retries = 3
45
+ self._retry_delay = 2 # seconds
46
 
 
47
  def _load_dataset(self, path: str) -> pd.DataFrame:
48
+ """Load dataset with retries."""
49
+ attempts = 0
50
+ last_error = None
51
+
52
+ while attempts < self._max_retries:
53
+ try:
54
+ logger.info(f"Attempting to load dataset from {path} (attempt {attempts+1}/{self._max_retries})")
55
+ return pd.read_parquet(path)
56
+ except Exception as e:
57
+ last_error = e
58
+ logger.warning(f"Error loading dataset from {path}: {e}. Retrying in {self._retry_delay} seconds...")
59
+ attempts += 1
60
+ time.sleep(self._retry_delay)
61
+
62
+ # If we get here, all attempts failed
63
+ logger.error(f"Failed to load dataset after {self._max_retries} attempts: {last_error}")
64
+
65
+ # Return empty fallback dataframe with appropriate columns
66
+ if "leaderboard" in path:
67
+ return self._create_fallback_leaderboard()
68
+ elif "responses" in path:
69
+ return self._create_fallback_responses()
70
+ elif "section_results" in path:
71
+ return self._create_fallback_section_results()
72
+ else:
73
+ return pd.DataFrame()
74
+
75
+ def _create_fallback_leaderboard(self) -> pd.DataFrame:
76
+ """Create a fallback leaderboard dataframe when loading fails."""
77
+ logger.info("Creating fallback leaderboard data")
78
+ return pd.DataFrame({
79
+ "model": ["Example Model"],
80
+ "family": ["Example"],
81
+ "quantization_level": ["None"],
82
+ "score": [0.0],
83
+ "timestamp": [pd.Timestamp.now()]
84
+ })
85
+
86
+ def _create_fallback_responses(self) -> pd.DataFrame:
87
+ """Create a fallback responses dataframe when loading fails."""
88
+ logger.info("Creating fallback responses data")
89
+ return pd.DataFrame({
90
+ "bolum": ["Example"],
91
+ "soru": ["Example question"],
92
+ "cevap": ["Example answer"],
93
+ "Example_Model_cevap": ["Example model response"]
94
+ })
95
+
96
+ def _create_fallback_section_results(self) -> pd.DataFrame:
97
+ """Create a fallback section results dataframe when loading fails."""
98
+ logger.info("Creating fallback section results data")
99
+ return pd.DataFrame({
100
+ "section": ["Example Section"],
101
+ "score": [0.0]
102
+ })
103
 
104
  def refresh_datasets(self) -> None:
105
  """Refresh all datasets from source."""
106
  try:
107
+ logger.info("Starting dataset refresh...")
108
  snapshot_download(
109
  repo_id="alibayram",
110
  repo_type="dataset",
111
+ local_dir=CONFIG["dataset"].cache_dir,
112
+ max_retries=5,
113
+ retry_delay_seconds=2
114
  )
115
+ # Clear cached data to force reload
116
+ self._leaderboard_data = None
117
+ self._responses_data = None
118
+ self._section_results_data = None
119
  logger.info("Datasets refreshed successfully")
120
  except Exception as e:
121
  logger.error(f"Error refreshing datasets: {e}")
utils.py CHANGED
@@ -1,53 +1,97 @@
1
  from typing import Optional, Dict
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
 
4
  from data_manager import data_manager
5
 
 
 
6
  def filter_leaderboard(
7
  family: Optional[str] = None,
8
  quantization_level: Optional[str] = None
9
  ) -> pd.DataFrame:
10
  """Filter leaderboard data based on criteria."""
11
- df = data_manager.leaderboard_data.copy()
12
-
13
- if family:
14
- df = df[df["family"] == family]
15
- if quantization_level:
16
- df = df[df["quantization_level"] == quantization_level]
17
-
18
- return df.sort_values("score", ascending=False)
 
 
 
 
 
 
 
 
19
 
20
  def search_responses(query: str, model: str) -> pd.DataFrame:
21
  """Search model responses based on query."""
22
- if not query or not model:
23
- return pd.DataFrame()
24
-
25
- filtered = data_manager.responses_data[
26
- data_manager.responses_data["bolum"].str.contains(query, case=False, na=False)
27
- ]
28
-
29
- selected_columns = ["bolum", "soru", "cevap", f"{model}_cevap"]
30
- return filtered[selected_columns].dropna()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def plot_section_results() -> plt.Figure:
33
  """Generate section results plot."""
34
- fig, ax = plt.subplots(figsize=(12, 6))
35
- avg_scores = data_manager.section_results_data.mean(numeric_only=True)
36
-
37
- bars = avg_scores.plot(kind="bar", ax=ax)
38
-
39
- # Customize plot
40
- ax.set_title("Average Section-Wise Performance", pad=20)
41
- ax.set_ylabel("Accuracy (%)")
42
- ax.set_xlabel("Sections")
43
- plt.xticks(rotation=45, ha='right')
44
- plt.tight_layout()
45
-
46
- # Add value labels
47
- for i, v in enumerate(avg_scores):
48
- ax.text(i, v, f'{v:.1f}%', ha='center', va='bottom')
49
-
50
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def validate_model_submission(
53
  model_name: str,
@@ -58,10 +102,16 @@ def validate_model_submission(
58
  model_type: str
59
  ) -> tuple[bool, str]:
60
  """Validate model submission parameters."""
61
- if not all([model_name, base_model]):
62
- return False, "Model name and base model are required."
63
-
64
- if model_name in data_manager.leaderboard_data["model"].values:
65
- return False, "Model name already exists."
66
-
67
- return True, "Validation successful"
 
 
 
 
 
 
 
1
  from typing import Optional, Dict
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
+ import logging
5
  from data_manager import data_manager
6
 
7
+ logger = logging.getLogger(__name__)
8
+
9
  def filter_leaderboard(
10
  family: Optional[str] = None,
11
  quantization_level: Optional[str] = None
12
  ) -> pd.DataFrame:
13
  """Filter leaderboard data based on criteria."""
14
+ try:
15
+ df = data_manager.leaderboard_data.copy()
16
+
17
+ if df.empty:
18
+ logger.warning("Leaderboard data is empty, returning empty DataFrame")
19
+ return pd.DataFrame()
20
+
21
+ if family:
22
+ df = df[df["family"] == family]
23
+ if quantization_level:
24
+ df = df[df["quantization_level"] == quantization_level]
25
+
26
+ return df.sort_values("score", ascending=False)
27
+ except Exception as e:
28
+ logger.error(f"Error filtering leaderboard: {e}")
29
+ return pd.DataFrame()
30
 
31
  def search_responses(query: str, model: str) -> pd.DataFrame:
32
  """Search model responses based on query."""
33
+ try:
34
+ if not query or not model:
35
+ return pd.DataFrame()
36
+
37
+ df = data_manager.responses_data
38
+
39
+ if df.empty:
40
+ logger.warning("Responses data is empty, returning empty DataFrame")
41
+ return pd.DataFrame()
42
+
43
+ # Check if model column exists
44
+ model_column = f"{model}_cevap"
45
+ if model_column not in df.columns:
46
+ logger.warning(f"Model column '{model_column}' not found in responses data")
47
+ return pd.DataFrame({"error": [f"Model '{model}' responses not found"]})
48
+
49
+ filtered = df[
50
+ df["bolum"].str.contains(query, case=False, na=False)
51
+ ]
52
+
53
+ selected_columns = ["bolum", "soru", "cevap", model_column]
54
+ return filtered[selected_columns].dropna()
55
+ except Exception as e:
56
+ logger.error(f"Error searching responses: {e}")
57
+ return pd.DataFrame({"error": [f"Error: {str(e)}"]})
58
 
59
  def plot_section_results() -> plt.Figure:
60
  """Generate section results plot."""
61
+ try:
62
+ df = data_manager.section_results_data
63
+
64
+ if df.empty:
65
+ logger.warning("Section results data is empty, returning empty plot")
66
+ fig, ax = plt.subplots(figsize=(12, 6))
67
+ ax.text(0.5, 0.5, "No data available", ha='center', va='center', fontsize=14)
68
+ ax.set_title("Section-Wise Performance", pad=20)
69
+ plt.tight_layout()
70
+ return fig
71
+
72
+ fig, ax = plt.subplots(figsize=(12, 6))
73
+ avg_scores = df.mean(numeric_only=True)
74
+
75
+ bars = avg_scores.plot(kind="bar", ax=ax)
76
+
77
+ # Customize plot
78
+ ax.set_title("Average Section-Wise Performance", pad=20)
79
+ ax.set_ylabel("Accuracy (%)")
80
+ ax.set_xlabel("Sections")
81
+ plt.xticks(rotation=45, ha='right')
82
+ plt.tight_layout()
83
+
84
+ # Add value labels
85
+ for i, v in enumerate(avg_scores):
86
+ ax.text(i, v, f'{v:.1f}%', ha='center', va='bottom')
87
+
88
+ return fig
89
+ except Exception as e:
90
+ logger.error(f"Error plotting section results: {e}")
91
+ fig, ax = plt.subplots(figsize=(12, 6))
92
+ ax.text(0.5, 0.5, f"Error generating plot: {str(e)}", ha='center', va='center', fontsize=12)
93
+ plt.tight_layout()
94
+ return fig
95
 
96
  def validate_model_submission(
97
  model_name: str,
 
102
  model_type: str
103
  ) -> tuple[bool, str]:
104
  """Validate model submission parameters."""
105
+ try:
106
+ if not all([model_name, base_model]):
107
+ return False, "Model name and base model are required."
108
+
109
+ # Check if leaderboard data is available
110
+ if not data_manager.leaderboard_data.empty:
111
+ if model_name in data_manager.leaderboard_data["model"].values:
112
+ return False, "Model name already exists."
113
+
114
+ return True, "Validation successful"
115
+ except Exception as e:
116
+ logger.error(f"Error validating model submission: {e}")
117
+ return False, f"Validation error: {str(e)}"