File size: 3,666 Bytes
977063a
edbe15e
 
 
977063a
edbe15e
 
 
 
 
 
 
 
 
 
 
42f819f
edbe15e
42f819f
 
5051da6
edbe15e
 
 
 
 
 
 
4babb59
edbe15e
 
470a9a5
acfff07
5051da6
edbe15e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154859a
edbe15e
470a9a5
 
 
154859a
 
 
470a9a5
154859a
470a9a5
 
5051da6
49c6a0b
5051da6
edbe15e
 
 
 
 
 
 
 
 
 
 
 
 
 
154859a
9ea5642
154859a
 
 
9ea5642
edbe15e
 
 
 
 
 
977063a
b9dc6d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import os
from evaluation_logic import run_evaluation
from eval.predict import PROMPT_FORMATTERS

PROMPT_TEMPLATES = {
    "duckdbinstgraniteshort": PROMPT_FORMATTERS["duckdbinstgraniteshort"]().PROMPT_TEMPLATE,
    "duckdbinst": PROMPT_FORMATTERS["duckdbinst"]().PROMPT_TEMPLATE,
}

def gradio_run_evaluation(inference_api, model_name, prompt_format, openrouter_token=None, custom_prompt=None):
    # Set environment variable if OpenRouter token is provided
    if inference_api == "openrouter":
        os.environ["OPENROUTER_API_KEY"] = str(openrouter_token)

    # We now pass both the format name and content to evaluation
    output = []
    for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format, custom_prompt):
        output.append(result)
        yield "\n".join(output)

def update_token_visibility(api):
    """Update visibility of the OpenRouter token input"""
    return gr.update(visible=api == "openrouter")

def update_prompt_template(prompt_format):
    """Update the template content when a preset is selected"""
    if prompt_format in PROMPT_TEMPLATES:
        return PROMPT_TEMPLATES[prompt_format]
    return ""

with gr.Blocks(gr.themes.Soft()) as demo:
    gr.Markdown("# DuckDB SQL Evaluation App")

    with gr.Row():
        with gr.Column():
            inference_api = gr.Dropdown(
                label="Inference API",
                choices=['openrouter'],
                value="openrouter"
            )

            openrouter_token = gr.Textbox(
                label="OpenRouter API Token",
                placeholder="Enter your OpenRouter API token",
                type="password",
                visible=True
            )

            model_name = gr.Textbox(
                label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)"
            )

            gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)")

    with gr.Row():
        with gr.Column():
            prompt_format = gr.Dropdown(
                label="Prompt Format",
                choices=['duckdbinst', 'duckdbinstgraniteshort', 'custom'],
                value="duckdbinstgraniteshort"
            )

            custom_prompt = gr.TextArea(
                label="Prompt Template Content",
                placeholder="Enter your custom prompt template here or select a preset format above.",
                lines=10,
                value=PROMPT_TEMPLATES['duckdbinstgraniteshort']
            )

    gr.Examples(
        examples=[
            ["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst"],
            ["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort"],
            ["openrouter", "mistralai/mistral-nemo", "duckdbinst"],
        ],
        inputs=[inference_api, model_name, prompt_format],
    )

    start_btn = gr.Button("Start Evaluation")
    output = gr.Textbox(label="Output", lines=20)

    # Update token visibility
    inference_api.change(
        fn=update_token_visibility,
        inputs=[inference_api],
        outputs=[openrouter_token]
    )

    # Update template content when preset is selected
    prompt_format.change(
        fn=update_prompt_template,
        inputs=[prompt_format],
        outputs=[custom_prompt]
    )

    # Just switch to custom mode when editing starts
    custom_prompt.change(
        fn=lambda _: "custom",
        inputs=None,
        outputs=prompt_format
    )

    start_btn.click(
        fn=gradio_run_evaluation,
        inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt],
        outputs=output
    )

demo.queue().launch()