qLeaderboard-aBase4Community / evaluation_queue.py
Quazim0t0's picture
Update evaluation_queue.py
3582217 verified
raw
history blame
9.69 kB
"""
Updated create_model_submission_ui function that properly displays benchmark names in dropdown.
Replace this function in your evaluation_queue.py file.
"""
def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
"""Create the model submission UI components.
Args:
evaluation_queue: Evaluation queue instance
auth_manager: Authentication manager instance
db_manager: Database manager instance
Returns:
gr.Blocks: Gradio Blocks component with model submission UI
"""
with gr.Blocks() as submission_ui:
with gr.Tab("Submit Model"):
gr.Markdown(f"""
### Model Size Restrictions
Models must fit within {evaluation_queue.memory_limit_gb}GB of RAM for evaluation.
Large models will be rejected to ensure all evaluations can complete successfully.
""", elem_classes=["info-text"])
with gr.Row():
with gr.Column(scale=2):
model_id_input = gr.Textbox(
placeholder="HuggingFace model ID (e.g., 'gpt2', 'facebook/opt-350m')",
label="Model ID"
)
check_size_button = gr.Button("Check Model Size")
size_check_result = gr.Markdown("")
model_name_input = gr.Textbox(
placeholder="Display name for your model",
label="Model Name"
)
model_description_input = gr.Textbox(
placeholder="Brief description of your model",
label="Description",
lines=3
)
model_parameters_input = gr.Number(
label="Number of Parameters (billions)",
precision=2
)
with gr.Column(scale=1):
model_tag_input = gr.Dropdown(
choices=evaluation_queue.model_tags,
label="Model Tag",
info="Select one category that best describes your model"
)
# Fixed benchmark dropdown to properly show names
benchmark_dropdown = gr.Dropdown(
label="Benchmark",
info="Select a benchmark to evaluate your model on",
choices=[("none", "Loading benchmarks...")],
value=None
)
refresh_benchmarks_button = gr.Button("Refresh Benchmarks")
submit_model_button = gr.Button("Submit for Evaluation")
submission_status = gr.Markdown("")
with gr.Tab("Evaluation Queue"):
refresh_queue_button = gr.Button("Refresh Queue")
with gr.Row():
with gr.Column(scale=1):
queue_stats = gr.JSON(
label="Queue Statistics"
)
with gr.Column(scale=2):
queue_status = gr.Dataframe(
headers=["ID", "Model", "Benchmark", "Status", "Submitted"],
label="Recent Evaluations"
)
with gr.Row(visible=True) as progress_container:
with gr.Column():
current_eval_info = gr.Markdown("No evaluation currently running")
# Use a simple text display for progress instead of Progress component
progress_display = gr.Markdown("Progress: 0%")
# Event handlers
def check_model_size_handler(model_id):
if not model_id:
return "Please enter a HuggingFace model ID."
try:
will_fit, message = evaluation_queue.check_model_size(model_id)
if will_fit:
return f"✅ {message}"
else:
return f"❌ {message}"
except Exception as e:
return f"Error checking model size: {str(e)}"
def refresh_benchmarks_handler():
benchmarks = db_manager.get_benchmarks()
# Format for dropdown - properly formatted to display names
choices = []
for b in benchmarks:
# Add as tuple of (id, name) to ensure proper display
choices.append((str(b["id"]), b["name"]))
if not choices:
choices = [("none", "No benchmarks available - add some first")]
return gr.update(choices=choices)
def submit_model_handler(model_id, model_name, model_description, model_parameters, model_tag, benchmark_id, request: gr.Request):
# Check if user is logged in
user = auth_manager.check_login(request)
if not user:
return "Please log in to submit a model."
if not model_id or not model_name or not model_tag or not benchmark_id:
return "Please fill in all required fields."
if benchmark_id == "none":
return "Please select a valid benchmark."
try:
# Check if model will fit in RAM
will_fit, size_message = evaluation_queue.check_model_size(model_id)
if not will_fit:
return f"❌ {size_message}"
# Add model to database
model_db_id = db_manager.add_model(
name=model_name,
hf_model_id=model_id,
user_id=user["id"],
tag=model_tag,
parameters=str(model_parameters) if model_parameters else None,
description=model_description
)
if not model_db_id:
return "Failed to add model to database."
# Submit for evaluation
eval_id, message = evaluation_queue.submit_evaluation(
model_id=model_db_id,
benchmark_id=benchmark_id,
user_id=user["id"]
)
if eval_id:
return f"✅ Model submitted successfully. {size_message}\nEvaluation ID: {eval_id}"
else:
return message
except Exception as e:
return f"Error submitting model: {str(e)}"
def refresh_queue_handler():
# Get queue statistics
stats = evaluation_queue.get_queue_status()
# Get recent evaluations (all statuses, limited to 20)
evals = db_manager.get_evaluation_results(limit=20)
# Format for dataframe
eval_data = []
for eval in evals:
eval_data.append([
eval["id"],
eval["model_name"],
eval["benchmark_name"],
eval["status"],
eval["submitted_at"]
])
# Also update progress display
current_eval, progress = evaluation_queue.get_current_progress()
if current_eval:
model_info = db_manager.get_model(current_eval['model_id'])
benchmark_info = db_manager.get_benchmark(current_eval['benchmark_id'])
if model_info and benchmark_info:
eval_info = f"**Currently Evaluating:** {model_info['name']} on {benchmark_info['name']}"
progress_text = f"Progress: {progress}%"
return stats, eval_data, eval_info, progress_text
return stats, eval_data, "No evaluation currently running", "Progress: 0%"
# Connect event handlers
check_size_button.click(
fn=check_model_size_handler,
inputs=[model_id_input],
outputs=[size_check_result]
)
refresh_benchmarks_button.click(
fn=refresh_benchmarks_handler,
inputs=[],
outputs=[benchmark_dropdown]
)
submit_model_button.click(
fn=submit_model_handler,
inputs=[
model_id_input,
model_name_input,
model_description_input,
model_parameters_input,
model_tag_input,
benchmark_dropdown
],
outputs=[submission_status]
)
refresh_queue_button.click(
fn=refresh_queue_handler,
inputs=[],
outputs=[queue_stats, queue_status, current_eval_info, progress_display]
)
# Initialize on load
submission_ui.load(
fn=refresh_benchmarks_handler,
inputs=[],
outputs=[benchmark_dropdown]
)
submission_ui.load(
fn=refresh_queue_handler,
inputs=[],
outputs=[queue_stats, queue_status, current_eval_info, progress_display]
)
return submission_ui