File size: 6,940 Bytes
ab59957
43ee4de
1c73b10
 
3ce2f84
 
43ee4de
1c73b10
 
 
ab59957
1c73b10
 
ab59957
1c73b10
 
 
3ce2f84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c73b10
 
 
a7fa922
1c73b10
 
 
 
 
3ce2f84
1c73b10
 
 
 
3ce2f84
1c73b10
 
 
 
 
 
 
 
 
 
 
 
 
 
3b81b14
1c73b10
 
 
 
3ce2f84
1c73b10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43ee4de
1c73b10
 
 
 
a7fa922
1c73b10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b81b14
1c73b10
a7fa922
1c73b10
3ce2f84
 
 
 
 
 
 
 
 
a7fa922
3ce2f84
 
 
 
 
 
 
 
 
 
 
 
 
 
a7fa922
1c73b10
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import gradio as gr
from apscheduler.schedulers.background import BackgroundScheduler
from typing import Optional
import logging
import sys
import time

from config import CONFIG
from data_manager import data_manager
from utils import filter_leaderboard, search_responses, plot_section_results, validate_model_submission

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_app() -> gr.Blocks:
    """Create and configure the Gradio application."""
    
    # Pre-load data with retries to avoid startup failures
    def safe_get_data():
        max_attempts = 3
        for attempt in range(max_attempts):
            try:
                logger.info(f"Pre-loading data (attempt {attempt+1}/{max_attempts})...")
                # Try to access data to trigger loading
                families = data_manager.leaderboard_data["family"].unique().tolist() if not data_manager.leaderboard_data.empty else []
                models = data_manager.leaderboard_data["model"].unique().tolist() if not data_manager.leaderboard_data.empty else []
                logger.info(f"Successfully loaded data with {len(families)} families and {len(models)} models")
                return True
            except Exception as e:
                logger.error(f"Error pre-loading data: {e}")
                if attempt < max_attempts - 1:
                    logger.info(f"Retrying in {CONFIG['dataset'].retry_delay} seconds...")
                    time.sleep(CONFIG["dataset"].retry_delay)
                else:
                    logger.warning("Using fallback data due to loading failures")
                    return False
    
    # Try to pre-load data
    safe_get_data()
    
    with gr.Blocks(css=CONFIG["ui"].css, theme=CONFIG["ui"].theme) as app:
        gr.HTML(f"<h1>{CONFIG['ui'].title}</h1>")
        gr.Markdown(CONFIG["ui"].description)

        with gr.Tabs() as tabs:
            # Leaderboard Tab
            with gr.TabItem("πŸ“Š Leaderboard"):
                with gr.Row():
                    family_filter = gr.Dropdown(
                        choices=data_manager.leaderboard_data["family"].unique().tolist() if not data_manager.leaderboard_data.empty else [],
                        label="Filter by Family",
                        multiselect=False
                    )
                    quantization_filter = gr.Dropdown(
                        choices=data_manager.leaderboard_data["quantization_level"].unique().tolist() if not data_manager.leaderboard_data.empty else [],
                        label="Filter by Quantization Level"
                    )
                
                filter_btn = gr.Button("Apply Filters", variant="primary")
                leaderboard_table = gr.DataFrame(
                    value=data_manager.leaderboard_data,
                    interactive=False
                )
                
                filter_btn.click(
                    filter_leaderboard,
                    inputs=[family_filter, quantization_filter],
                    outputs=leaderboard_table
                )

            # Model Responses Tab
            with gr.TabItem("πŸ” Model Responses"):
                with gr.Row():
                    model_dropdown = gr.Dropdown(
                        choices=data_manager.leaderboard_data["model"].unique().tolist() if not data_manager.leaderboard_data.empty else [],
                        label="Select Model"
                    )
                    query_input = gr.Textbox(
                        label="Search Query",
                        placeholder="Enter search terms..."
                    )
                
                search_btn = gr.Button("Search", variant="primary")
                responses_table = gr.DataFrame()
                
                search_btn.click(
                    search_responses,
                    inputs=[query_input, model_dropdown],
                    outputs=responses_table
                )

            # Section Results Tab
            with gr.TabItem("πŸ“ˆ Section Results"):
                gr.Plot(value=plot_section_results)
                gr.DataFrame(value=data_manager.section_results_data)

            # Submit Model Tab
            with gr.TabItem("βž• Submit Model"):
                gr.Markdown("### Submit Your Model for Evaluation")
                
                with gr.Group():
                    model_name = gr.Textbox(label="Model Name", placeholder="Enter unique model name")
                    base_model = gr.Textbox(label="Base Model", placeholder="Enter base model name")
                    revision = gr.Textbox(label="Revision", value="main")
                    
                    with gr.Row():
                        precision = gr.Dropdown(
                            choices=CONFIG["model"].precision_options,
                            label="Precision",
                            value="float16"
                        )
                        weight_type = gr.Dropdown(
                            choices=CONFIG["model"].weight_types,
                            label="Weight Type",
                            value="Original"
                        )
                        model_type = gr.Dropdown(
                            choices=CONFIG["model"].model_types,
                            label="Model Type",
                            value="Transformer"
                        )
                
                submit_btn = gr.Button("Submit Model", variant="primary")
                submission_output = gr.Markdown()
                
                def handle_submission(*args):
                    is_valid, message = validate_model_submission(*args)
                    if not is_valid:
                        return f"❌ {message}"
                    return "βœ… Model submitted successfully!"
                
                submit_btn.click(
                    handle_submission,
                    inputs=[model_name, base_model, revision, precision, weight_type, model_type],
                    outputs=submission_output
                )

    return app

def main():
    try:
        # Initialize scheduler for data refresh
        scheduler = BackgroundScheduler()
        scheduler.add_job(
            data_manager.refresh_datasets,
            "interval",
            seconds=CONFIG["dataset"].refresh_interval
        )
        scheduler.start()

        # Create and launch app
        app = create_app()
        app.queue(default_concurrency_limit=40).launch(
            inbrowser=True,
            server_name="0.0.0.0",  # Use 0.0.0.0 to listen on all interfaces
            server_port=7860,
            share=False,
            debug=False,
            show_error=True,
            max_threads=40
        )
    except Exception as e:
        logger.error(f"Error starting application: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()