Spaces:
Sleeping
Sleeping
# Application Configuration | |
app: | |
title: "DataEngEval" | |
description: "A config-driven evaluation platform for English → SQL tasks across Presto, BigQuery, and Snowflake." | |
theme: "soft" | |
server: | |
host: "0.0.0.0" | |
port: 7860 | |
share: true | |
# Leaderboard Configuration | |
leaderboard: | |
path: "tasks/leaderboard.parquet" | |
columns: | |
- "timestamp" | |
- "dataset_name" | |
- "case_id" | |
- "dialect" | |
- "model_name" | |
- "question" | |
- "reference_sql" | |
- "candidate_sql" | |
- "correctness_exact" | |
- "result_match_f1" | |
- "exec_success" | |
- "latency_ms" | |
- "readability" | |
- "dialect_ok" | |
- "composite_score" | |
display: | |
top_results: 50 | |
results_table_headers: | |
- "Rank" | |
- "Model" | |
- "Composite Score" | |
- "Correctness" | |
- "Result F1" | |
- "Exec Success" | |
- "Latency" | |
- "Dataset" | |
- "Case ID" | |
- "Question" | |
- "Reference SQL" | |
- "Generated SQL" | |
- "Dialect OK" | |
# Available SQL Dialects | |
dialects: | |
- "presto" | |
- "bigquery" | |
- "snowflake" | |
# Available Use Cases | |
use_cases: | |
- "sql_generation" | |
- "code_generation" | |
- "documentation" | |
# Visible Datasets (control which datasets appear in UI) | |
visible_datasets: | |
- "sql_generation/nyc_taxi_small" | |
# - "code_generation/python_algorithms" # Disabled | |
# - "code_generation/go_algorithms" # Disabled | |
# - "documentation/technical_docs" # Disabled | |
# - "documentation/api_documentation" # Disabled | |
# Available Programming Languages (for code generation) | |
languages: | |
- "python" | |
- "go" | |
- "javascript" | |
- "java" | |
# Available Documentation Formats | |
doc_formats: | |
- "markdown" | |
- "html" | |
- "json" | |
- "yaml" | |
# Prompt Template Configuration | |
prompts: | |
template_path: "prompts/" | |
fallback_template: | | |
You are an expert SQL developer specializing in {dialect} SQL dialect. | |
Given the following database schema and a natural language question, generate a correct SQL query in {dialect} syntax. | |
Database Schema: | |
{{schema}} | |
Question: {{question}} | |
Requirements: | |
- Use proper {dialect} SQL syntax | |
- Ensure the query is syntactically correct | |
- Return only the SQL query, no explanations | |
SQL Query: | |
# Environment Configuration | |
environment: | |
mock_mode_env: "MOCK_MODE" | |
hf_token_env: "HF_TOKEN" | |
mock_mode_default: false | |
# UI Configuration | |
ui: | |
tabs: | |
- name: "Evaluate" | |
label: "Evaluate" | |
- name: "Leaderboard" | |
label: "Leaderboard" | |
- name: "Info" | |
label: "Info" | |
buttons: | |
refresh: | |
text: "Refresh Leaderboard" | |
variant: "secondary" | |
size: "sm" | |
run_evaluation: | |
text: "Run Evaluation" | |
variant: "primary" | |
inputs: | |
dataset: | |
label: "Dataset" | |
dialect: | |
label: "SQL Dialect" | |
default: "presto" | |
case: | |
label: "Test Case" | |
models: | |
label: "Models to Evaluate" | |
outputs: | |
status: | |
label: "Status" | |
results: | |
label: "Results" | |
headers: | |
- "Model" | |
- "Composite Score" | |
- "Correctness" | |
- "Exec Success" | |
- "Result F1" | |
- "Latency" | |
detailed: | |
label: "Detailed Results" | |
leaderboard: | |
label: "Global Leaderboard (Top 50)" | |