File size: 1,496 Bytes
977063a
b9dc6d6
977063a
3a1fd8c
42f819f
3a1fd8c
42f819f
 
5051da6
470a9a5
acfff07
5051da6
3a1fd8c
 
470a9a5
3a1fd8c
 
f9d0ccd
470a9a5
 
b9dc6d6
42f819f
7a91b08
b9dc6d6
 
470a9a5
 
 
 
 
 
 
 
 
5051da6
49c6a0b
5051da6
3a1fd8c
977063a
b9dc6d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from evaluation_logic import run_evaluation, AVAILABLE_PROMPT_FORMATS

def gradio_run_evaluation(inference_api, model_name, prompt_format):
    output = []
    for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format):
        output.append(result)
        yield "\n".join(output)

with gr.Blocks(gr.themes.Soft()) as demo:
    gr.Markdown("# DuckDB SQL Evaluation App")

    inference_api = gr.Dropdown(
        label="Inference API",
        choices=['openrouter'],
        value="openrouter"
    )
    model_name = gr.Textbox(label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)")
    gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)")

    prompt_format = gr.Dropdown(
        label="Prompt Format",
        choices=['duckdbinst', 'duckdbinstgraniteshort'], #AVAILABLE_PROMPT_FORMATS,
        value="duckdbinstgraniteshort"
    )
    gr.Examples(
        examples=[
            ["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst"],
            ["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort"],
            ["openrouter", "mistralai/mistral-nemo", "duckdbinst"],
        ],
        inputs=[inference_api, model_name, prompt_format],
    )

    start_btn = gr.Button("Start Evaluation")
    output = gr.Textbox(label="Output", lines=20)

    start_btn.click(fn=gradio_run_evaluation, inputs=[inference_api, model_name, prompt_format], outputs=output)

demo.queue().launch()