DataEngEval / config /app.yaml
uparekh01151's picture
Add dataset visibility configuration - only show nyc_taxi_small by default
a026fe5
# Application Configuration
app:
title: "DataEngEval"
description: "A config-driven evaluation platform for English → SQL tasks across Presto, BigQuery, and Snowflake."
theme: "soft"
server:
host: "0.0.0.0"
port: 7860
share: true
# Leaderboard Configuration
leaderboard:
path: "tasks/leaderboard.parquet"
columns:
- "timestamp"
- "dataset_name"
- "case_id"
- "dialect"
- "model_name"
- "question"
- "reference_sql"
- "candidate_sql"
- "correctness_exact"
- "result_match_f1"
- "exec_success"
- "latency_ms"
- "readability"
- "dialect_ok"
- "composite_score"
display:
top_results: 50
results_table_headers:
- "Rank"
- "Model"
- "Composite Score"
- "Correctness"
- "Result F1"
- "Exec Success"
- "Latency"
- "Dataset"
- "Case ID"
- "Question"
- "Reference SQL"
- "Generated SQL"
- "Dialect OK"
# Available SQL Dialects
dialects:
- "presto"
- "bigquery"
- "snowflake"
# Available Use Cases
use_cases:
- "sql_generation"
- "code_generation"
- "documentation"
# Visible Datasets (control which datasets appear in UI)
visible_datasets:
- "sql_generation/nyc_taxi_small"
# - "code_generation/python_algorithms" # Disabled
# - "code_generation/go_algorithms" # Disabled
# - "documentation/technical_docs" # Disabled
# - "documentation/api_documentation" # Disabled
# Available Programming Languages (for code generation)
languages:
- "python"
- "go"
- "javascript"
- "java"
# Available Documentation Formats
doc_formats:
- "markdown"
- "html"
- "json"
- "yaml"
# Prompt Template Configuration
prompts:
template_path: "prompts/"
fallback_template: |
You are an expert SQL developer specializing in {dialect} SQL dialect.
Given the following database schema and a natural language question, generate a correct SQL query in {dialect} syntax.
Database Schema:
{{schema}}
Question: {{question}}
Requirements:
- Use proper {dialect} SQL syntax
- Ensure the query is syntactically correct
- Return only the SQL query, no explanations
SQL Query:
# Environment Configuration
environment:
mock_mode_env: "MOCK_MODE"
hf_token_env: "HF_TOKEN"
mock_mode_default: false
# UI Configuration
ui:
tabs:
- name: "Evaluate"
label: "Evaluate"
- name: "Leaderboard"
label: "Leaderboard"
- name: "Info"
label: "Info"
buttons:
refresh:
text: "Refresh Leaderboard"
variant: "secondary"
size: "sm"
run_evaluation:
text: "Run Evaluation"
variant: "primary"
inputs:
dataset:
label: "Dataset"
dialect:
label: "SQL Dialect"
default: "presto"
case:
label: "Test Case"
models:
label: "Models to Evaluate"
outputs:
status:
label: "Status"
results:
label: "Results"
headers:
- "Model"
- "Composite Score"
- "Correctness"
- "Exec Success"
- "Result F1"
- "Latency"
detailed:
label: "Detailed Results"
leaderboard:
label: "Global Leaderboard (Top 50)"