Spaces:

simone-papicchio
/

qatch-demo

Running

App Files Files Community

simone-papicchio

franceth commited on Mar 21

Commit

aff05a7

verified ·

1 Parent(s): ac204fe

More stable version. Link all acc, but still miss prediction (#6)

Browse files

- More stable version. Link all acc, but still miss prediction (696b8fe88b9890ac834f2892408dc569ef849c2f)

Co-authored-by: Francesco Giannuzzo <[email protected]>

Files changed (5) hide show

app.py +828 -559
style.css +6 -1
test_results.csv +101 -0
utilities.py +8 -3
utils_get_db_tables_info.py +94 -0

app.py CHANGED Viewed

@@ -1,560 +1,829 @@
-import os
-# https://discuss.huggingface.co/t/issues-with-sadtalker-zerogpu-spaces-inquiry-about-community-grant/110625/10
-if os.environ.get("SPACES_ZERO_GPU") is not None:
-    import spaces
-else:
-    class spaces:
-        @staticmethod
-        def GPU(func):
-            def wrapper(*args, **kwargs):
-                return func(*args, **kwargs)
-            return wrapper
-import gradio as gr
-import pandas as pd
-import os
-from qatch.connectors.sqlite_connector import SqliteConnector
-from qatch.generate_dataset.orchestrator_generator import OrchestratorGenerator
-from qatch.evaluate_dataset.orchestrator_evaluator import OrchestratorEvaluator
-import utilities as us
-import plotly.express as px
-import plotly.graph_objects as go
-@spaces.GPU
-def model_prediction():
-  pass
-with open('style.css', 'r') as file:
-    css = file.read()
-# DataFrame di default
-df_default = pd.DataFrame({
-    'Name': ['Alice', 'Bob', 'Charlie'],
-    'Age': [25, 30, 35],
-    'City': ['New York', 'Los Angeles', 'Chicago']
-})
-models_path = "models.csv"
-# Variabile globale per tenere traccia dei dati correnti
-df_current = df_default.copy()
-input_data = {
-    'input_method': "",
-    'data_path': "",
-    'db_name': "",
-    'data': {
-        'data_frames': {},    # dictionary of dataframes
-        'db': None             # SQLITE3 database object
-    },
-    'models': []
-}
-def load_data(file, path, use_default):
-    """Carica i dati da un file, un percorso o usa il DataFrame di default."""
-    global df_current
-    if use_default:
-        input_data["input_method"] = 'default'
-        input_data["data_path"] = os.path.join(".", "data", "datainterface", "mytable.sqlite")
-        input_data["db_name"] = os.path.splitext(os.path.basename(input_data["data_path"]))[0]
-        input_data["data"]['data_frames'] = {'MyTable': df_current}
-        #TODO assegna il db a input_data["data"]['db']
-        df_current = df_default.copy()  # Ripristina i dati di default
-        return input_data["data"]['data_frames']
-    selected_inputs = sum([file is not None, bool(path), use_default])
-    if selected_inputs > 1:
-        return 'Errore: Selezionare solo un metodo di input alla volta.'
-    if file is not None:
-        try:
-            input_data["input_method"] = 'uploaded_file'
-            input_data["db_name"] = os.path.splitext(os.path.basename(file))[0]
-            input_data["data_path"] = os.path.join(".", "data", f"data_interface{input_data['db_name']}.sqlite")
-            input_data["data"] = us.load_data(input_data["data_path"], input_data["db_name"])
-            df_current = input_data["data"]['data_frames'].get('MyTable', df_default)  # Carica il DataFrame
-            print(df_current)
-            print(input_data["data"])
-            if( input_data["data"]['data_frames'] and not input_data["data"]['db']):
-                table2primary_key = {}
-                print("ok")
-                for table_name, df in input_data["data"]['data_frames'].items():
-                    # Assign primary keys for each table
-                    table2primary_key[table_name] = 'id'
-                print("ok2")
-                input_data["data"]["db"] = SqliteConnector(
-                    relative_db_path=input_data["data_path"],
-                    db_name=input_data["db_name"],
-                    tables= input_data["data"]['data_frames'],
-                    table2primary_key=table2primary_key
-                )
-                print(input_data["data"]["db"])
-            return input_data["data"]['data_frames']
-        except Exception as e:
-            return f'Errore nel caricamento del file: {e}'
-    if path:
-        if not os.path.exists(path):
-            return 'Errore: Il percorso specificato non esiste.'
-        try:
-            input_data["input_method"] = 'uploaded_file'
-            input_data["data_path"] = path
-            input_data["db_name"] = os.path.splitext(os.path.basename(path))[0]
-            input_data["data"] = us.load_data(input_data["data_path"], input_data["db_name"])
-            df_current = input_data["data"]['data_frames'].get('MyTable', df_default)  # Carica il DataFrame
-            return input_data["data"]['data_frames']
-        except Exception as e:
-            return f'Errore nel caricamento del file dal percorso: {e}'
-    return input_data["data"]['data_frames']
-def preview_default(use_default):
-    """Mostra il DataFrame di default se il checkbox è selezionato."""
-    if use_default:
-        return df_default  # Mostra il DataFrame di default
-    return df_current  # Mostra il DataFrame corrente, che potrebbe essere stato modificato
-def update_df(new_df):
-    """Aggiorna il DataFrame corrente."""
-    global df_current  # Usa la variabile globale per aggiornarla
-    df_current = new_df
-    return df_current
-def open_accordion(target):
-    # Apre uno e chiude l'altro
-    if target == "reset":
-        return gr.update(open=True), gr.update(open=False, visible=False), gr.update(open=False, visible=False), gr.update(open=False, visible=False), gr.update(open=False, visible=False)
-    elif target == "model_selection":
-        return gr.update(open=False), gr.update(open=False), gr.update(open=True, visible=True), gr.update(open=False), gr.update(open=False)
-# Interfaccia Gradio
-interface = gr.Blocks(theme='d8ahazard/rd_blue', css_paths='style.css')
-with interface:
-    gr.Markdown("# QATCH")
-    data_state = gr.State(None)  # Memorizza i dati caricati
-    upload_acc = gr.Accordion("Upload your data section", open=True, visible=True)
-    select_table_acc = gr.Accordion("Select tables", open=False, visible=False)
-    select_model_acc = gr.Accordion("Select models", open=False, visible=False)
-    qatch_acc = gr.Accordion("QATCH execution", open=False, visible=False)
-    metrics_acc = gr.Accordion("Metrics", open=False, visible=False)
-    #################################
-    #  PARTE DI INSERIMENTO DEL DB  #
-    #################################
-    with upload_acc:
-        gr.Markdown("## Caricamento dei Dati")
-        file_input = gr.File(label="Trascina e rilascia un file", file_types=[".csv", ".xlsx", ".sqlite"])
-        path_input = gr.Textbox(label="Oppure inserisci il percorso locale del file")
-        with gr.Row():
-            default_checkbox = gr.Checkbox(label="Usa DataFrame di default")
-        preview_output = gr.DataFrame(interactive=True, visible=True, value=df_default)
-        submit_button = gr.Button("Carica Dati", interactive=False)  # Disabilitato di default
-        output = gr.JSON(visible=False)  # Output dizionario
-        # Funzione per abilitare il bottone se sono presenti dati da caricare
-        def enable_submit(file, path, use_default):
-            return gr.update(interactive=bool(file or path or use_default))
-        # Abilita il bottone quando i campi di input sono valorizzati
-        file_input.change(fn=enable_submit, inputs=[file_input, path_input, default_checkbox], outputs=[submit_button])
-        path_input.change(fn=enable_submit, inputs=[file_input, path_input, default_checkbox], outputs=[submit_button])
-        default_checkbox.change(fn=enable_submit, inputs=[file_input, path_input, default_checkbox], outputs=[submit_button])
-        # Mostra l'anteprima del DataFrame di default quando il checkbox è selezionato
-        default_checkbox.change(fn=preview_default, inputs=[default_checkbox], outputs=[preview_output])
-        preview_output.change(fn=update_df, inputs=[preview_output], outputs=[preview_output])
-        def handle_output(file, path, use_default):
-            """Gestisce l'output quando si preme il bottone 'Carica Dati'."""
-            result = load_data(file, path, use_default)
-            if isinstance(result, dict):  # Se result è un dizionario di DataFrame
-                if len(result) == 1:  # Se c'è solo una tabella
-                    return (
-                        gr.update(visible=False),  # Nasconde l'output JSON
-                        result,  # Salva lo stato dei dati
-                        gr.update(visible=False),  # Nasconde la selezione tabella
-                        result,  # Mantiene lo stato dei dati
-                        gr.update(interactive=False),  # Disabilita il pulsante di submit
-                        gr.update(visible=True, open=True),  # Passa direttamente a select_model_acc
-                        gr.update(visible=True, open=False)
-                    )
-                else:
-                    return (
-                        gr.update(visible=False),
-                        result,
-                        gr.update(open=True, visible=True),
-                        result,
-                        gr.update(interactive=False),
-                        gr.update(visible=False),  # Mantiene il comportamento attuale
-                        gr.update(visible=True, open=True)
-                    )
-            else:
-                return (
-                    gr.update(visible=False),
-                    None,
-                    gr.update(open=False, visible=True),
-                    None,
-                    gr.update(interactive=True),
-                    gr.update(visible=False),
-                    gr.update(visible=True, open=True)
-                )
-        submit_button.click(
-            fn=handle_output,
-            inputs=[file_input, path_input, default_checkbox],
-            outputs=[output, output, select_table_acc, data_state, submit_button, select_model_acc, upload_acc]
-        )
-    ######################################
-    #  PARTE DI SELEZIONE DELLE TABELLE  #
-    ######################################
-    with select_table_acc:
-        table_selector = gr.CheckboxGroup(choices=[], label="Seleziona le tabelle da visualizzare", value=[])
-        table_outputs = [gr.DataFrame(label=f"Tabella {i+1}", interactive=True, visible=False) for i in range(5)]
-        selected_table_names = gr.Textbox(label="Tabelle selezionate", visible=False, interactive=False)
-        # Bottone di selezione modelli (inizialmente disabilitato)
-        open_model_selection = gr.Button("Choose your models", interactive=False)
-        def update_table_list(data):
-            """Aggiorna dinamicamente la lista delle tabelle disponibili."""
-            if isinstance(data, dict) and data:
-                table_names = list(data.keys())  # Ritorna solo i nomi delle tabelle
-                return gr.update(choices=table_names, value=[])  # Reset delle selezioni
-            return gr.update(choices=[], value=[])
-        def show_selected_tables(data, selected_tables):
-            """Mostra solo le tabelle selezionate dall'utente e abilita il bottone."""
-            updates = []
-            if isinstance(data, dict) and data:
-                available_tables = list(data.keys())  # Nomi effettivamente disponibili
-                selected_tables = [t for t in selected_tables if t in available_tables]  # Filtra selezioni valide
-                tables = {name: data[name] for name in selected_tables}  # Filtra i DataFrame
-                for i, (name, df) in enumerate(tables.items()):
-                    updates.append(gr.update(value=df, label=f"Tabella: {name}", visible=True))
-                # Se ci sono meno di 5 tabelle, nascondi gli altri DataFrame
-                for _ in range(len(tables), 5):
-                    updates.append(gr.update(visible=False))
-            else:
-                updates = [gr.update(value=pd.DataFrame(), visible=False) for _ in range(5)]
-            # Abilitare/disabilitare il bottone in base alle selezioni
-            button_state = bool(selected_tables)  # True se almeno una tabella è selezionata, False altrimenti
-            updates.append(gr.update(interactive=button_state))  # Aggiorna stato bottone
-            return updates
-        def show_selected_table_names(selected_tables):
-            """Mostra i nomi delle tabelle selezionate quando si preme il bottone."""
-            if selected_tables:
-                return gr.update(value=", ".join(selected_tables), visible=False)
-            return gr.update(value="", visible=False)
-        # Aggiorna automaticamente la lista delle checkbox quando `data_state` cambia
-        data_state.change(fn=update_table_list, inputs=[data_state], outputs=[table_selector])
-        # Aggiorna le tabelle visibili e lo stato del bottone in base alle selezioni dell'utente
-        table_selector.change(fn=show_selected_tables, inputs=[data_state, table_selector], outputs=table_outputs + [open_model_selection])
-        # Mostra la lista delle tabelle selezionate quando si preme "Choose your models"
-        open_model_selection.click(fn=show_selected_table_names, inputs=[table_selector], outputs=[selected_table_names])
-        open_model_selection.click(open_accordion, inputs=gr.State("model_selection"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc])
-    ####################################
-    #  PARTE DI SELEZIONE DEL MODELLO  #
-    ####################################
-    with select_model_acc:
-        gr.Markdown("**Model Selection**")
-        # Supponiamo che `us.read_models_csv` restituisca anche il percorso dell'immagine
-        model_list_dict = us.read_models_csv(models_path)
-        model_list = [model["name"] for model in model_list_dict]
-        model_images = [model["image_path"] for model in model_list_dict]
-        # Creazione dinamica di checkbox con immagini
-        model_checkboxes = []
-        for model, image_path in zip(model_list, model_images):
-            with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Image(image_path, show_label=False)
-                with gr.Column(scale=2):
-                    model_checkboxes.append(gr.Checkbox(label=model, value=False))
-        selected_models_output = gr.JSON(visible = False)
-        # Funzione per ottenere i modelli selezionati
-        def get_selected_models(*model_selections):
-            selected_models = [model for model, selected in zip(model_list, model_selections) if selected]
-            input_data['models'] = selected_models
-            button_state = bool(selected_models)  # True se almeno un modello è selezionato, False altrimenti
-            return selected_models, gr.update(open=True, visible=True), gr.update(interactive=button_state)
-        # Bottone di submit (inizialmente disabilitato)
-        submit_models_button = gr.Button("Submit Models", interactive=False)
-        # Collegamento dei checkbox agli eventi di selezione
-        for checkbox in model_checkboxes:
-            checkbox.change(
-                fn=get_selected_models,
-                inputs=model_checkboxes,
-                outputs=[selected_models_output, select_model_acc, submit_models_button]
-            )
-        submit_models_button.click(
-            fn=lambda *args: (get_selected_models(*args), gr.update(open=False, visible=True), gr.update(open=True, visible=True)),
-            inputs=model_checkboxes,
-            outputs=[selected_models_output, select_model_acc, qatch_acc]
-        )
-        reset_data = gr.Button("Open upload data section")
-        reset_data.click(open_accordion, inputs=gr.State("reset"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc])
-    ###############################
-    #  PARTE DI ESECUZIONE QATCH  #
-    ###############################
-    with qatch_acc:
-        selected_models_display = gr.JSON(label="Modelli selezionati")
-        submit_models_button.click(
-            fn=lambda: gr.update(value=input_data),
-            outputs=[selected_models_display]
-        )
-        proceed_to_metrics_button = gr.Button("Proceed to Metrics")
-        proceed_to_metrics_button.click(
-            fn=lambda: (gr.update(open=False, visible=True), gr.update(open=True, visible=True)),
-            outputs=[qatch_acc, metrics_acc]
-        )
-        reset_data = gr.Button("Open upload data section")
-        reset_data.click(open_accordion, inputs=gr.State("reset"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc])
-    #######################################
-    #  PARTE DI VISUALIZZAZIONE METRICHE  #
-    #######################################
-    with metrics_acc:
-        confirmation_text = gr.Markdown("## Metrics successfully loaded")
-        data_path = 'metrics_random2.csv'
-        def load_data_csv_es():
-            return pd.read_csv(data_path)
-        def calculate_average_metrics(df, selected_metrics):
-            df['avg_metric'] = df[selected_metrics].mean(axis=1)
-            return df
-        def plot_metric(df, selected_metrics, group_by, selected_models):
-            df = df[df['model'].isin(selected_models)]
-            df = calculate_average_metrics(df, selected_metrics)
-            avg_metrics = df.groupby(group_by)['avg_metric'].mean().reset_index()
-            fig = px.bar(
-                avg_metrics, x=group_by[0], y='avg_metric', color=group_by[-1], barmode='group',
-                title=f'Media metrica per {group_by[0]}',
-                labels={group_by[0]: group_by[0].capitalize(), 'avg_metric': 'Media Metrica'},
-                template='plotly_dark'
-            )
-            return fig
-        def plot_radar(df, selected_models):
-            radar_data = []
-            for model in selected_models:
-                model_df = df[df['model'] == model]
-                valid_efficiency = model_df['valid_efficiency_score'].mean()
-                avg_time = model_df['time'].mean()
-                avg_tuple_order = model_df['tuple_order'].dropna().mean()
-                radar_data.append({
-                    'model': model,
-                    'valid_efficiency_score': valid_efficiency,
-                    'time': avg_time,
-                    'tuple_order': avg_tuple_order
-                })
-            radar_df = pd.DataFrame(radar_data)
-            categories = ['valid_efficiency_score', 'time', 'tuple_order']
-            # Calcola il range dinamico per il grafico
-            min_val = radar_df[categories].min().min()
-            max_val = radar_df[categories].max().max()
-            radar_df[categories] = (radar_df[categories] - min_val) / (max_val - min_val)
-            fig = go.Figure()
-            for _, row in radar_df.iterrows():
-                fig.add_trace(go.Scatterpolar(
-                    r=[row[cat] for cat in categories],
-                    theta=categories,
-                    fill='toself',
-                    name=row['model']
-                ))
-            fig.update_layout(
-                polar=dict(radialaxis=dict(visible=True, range=[min_val, max_val])),
-                title='Radar Plot delle Metriche per Modello',
-                template='plotly_dark',
-                width=700, height=700
-            )
-            return fig
-        def plot_query_rate(df, selected_models, show_labels):
-            df = df[df['model'].isin(selected_models)]
-            fig = go.Figure()
-            for model in selected_models:
-                model_df = df[df['model'] == model].copy()
-                model_df['cumulative_time'] = model_df['time'].cumsum()
-                model_df['query_rate'] = 1 / model_df['time']
-                fig.add_trace(go.Scatter(
-                    x=model_df['cumulative_time'],
-                    y=model_df['query_rate'],
-                    mode='lines+markers',
-                    name=model,
-                    line=dict(width=2)
-                ))
-                if show_labels:
-                    prev_category = None
-                    prev_time = -float('inf')
-                    y_positions = [1.1, 1.3]
-                    y_idx = 0
-                    for i, row in model_df.iterrows():
-                        current_category = row['test_category']
-                        if current_category != prev_category and row['cumulative_time'] - prev_time > 5:
-                            fig.add_vline(x=row['cumulative_time'], line_width=1, line_dash="dash", line_color="gray")
-                            fig.add_annotation(
-                                x=row['cumulative_time'],
-                                y=max(model_df['query_rate']) * y_positions[y_idx % 2],
-                                text=current_category,
-                                showarrow=False,
-                                font=dict(size=10, color="white"),
-                                textangle=45,
-                                yshift=10,
-                                bgcolor="rgba(0,0,0,0.6)"
-                            )
-                            prev_category = current_category
-                            prev_time = row['cumulative_time']
-                            y_idx += 1
-            fig.update_layout(
-                title="Rate di Generazione delle Query per Modello",
-                xaxis_title="Tempo Cumulativo (s)",
-                yaxis_title="Query al Secondo",
-                template='plotly_dark',
-                legend_title="Modelli"
-            )
-            return fig
-        def update_plot(selected_metrics, group_by, selected_models):
-            df = load_data_csv_es()
-            return plot_metric(df, selected_metrics, group_by, selected_models)
-        def update_radar(selected_models):
-            df = load_data_csv_es()
-            return plot_radar(df, selected_models)
-        def update_query_rate(selected_models, show_labels):
-            df = load_data_csv_es()
-            return plot_query_rate(df, selected_models, show_labels)
-        def plot_query_time_evolution(df, selected_models):
-            # Filtriamo i dati per i modelli selezionati
-            df = df[df['model'].isin(selected_models)]
-            # Ordinare per modello e tempo per tracciare l'evoluzione
-            df_sorted = df.sort_values(by=['model', 'time'])
-            fig = go.Figure()
-            # Aggiungiamo una traccia per ogni modello
-            for model in selected_models:
-                model_df = df_sorted[df_sorted['model'] == model]
-                fig.add_trace(go.Scatter(
-                    x=model_df.index, y=model_df['time'], mode='lines+markers', name=model,
-                    line=dict(shape='linear'),
-                    text=model_df['model']
-                ))
-            fig.update_layout(
-                title="Evoluzione del Tempo di Generazione per Modello",
-                xaxis_title="Indice della Query",
-                yaxis_title="Tempo (s)",
-                template='plotly_dark'
-            )
-            return fig
-        metrics = ["cell_precision", "cell_recall", "execution_accuracy", "tuple_cardinality", "tuple_constraint"]
-        group_options = {
-            "SQL Category": ["test_category", "model"],
-            "Tabella": ["tbl_name", "model"],
-            "Modello": ["model"]
-        }
-        df_initial = load_data_csv_es()
-        models = df_initial['model'].unique().tolist()
-        #with gr.Blocks(theme=gr.themes.Default(primary_hue='blue')) as demo:
-        gr.Markdown("""## Analisi delle prestazioni dei modelli
-        Seleziona una o più metriche per calcolare la media e visualizzare gli istogrammi e radar plots.
-        """)
-        # Sezione di selezione delle opzioni
-        with gr.Row():
-            metric_multiselect = gr.CheckboxGroup(choices=metrics, label="Seleziona le metriche")
-            model_multiselect = gr.CheckboxGroup(choices=models, label="Seleziona i modelli", value=models)
-            group_radio = gr.Radio(choices=list(group_options.keys()), label="Seleziona il raggruppamento", value="SQL Category")
-            #show_labels_checkbox = gr.Checkbox(label="Mostra etichette test category", value=True)
-        with gr.Row():
-            output_plot = gr.Plot()
-        # Dividi la pagina in due colonne
-        with gr.Row():
-            with gr.Column(scale=1):  # Imposta la colonna a occupare metà della larghezza
-                radar_plot = gr.Plot(value=update_radar(models))
-            with gr.Column(scale=2):  # Imposta la seconda colonna a occupare l'altra metà
-                show_labels_checkbox = gr.Checkbox(label="Mostra etichette test category", value=True)
-                query_rate_plot = gr.Plot(value=update_query_rate(models, True))
-        # Funzioni di callback per il cambiamento dei grafici
-        def on_change(selected_metrics, selected_group, selected_models):
-            return update_plot(selected_metrics, group_options[selected_group], selected_models)
-        def on_radar_change(selected_models):
-            return update_radar(selected_models)
-        show_labels_checkbox.change(update_query_rate, inputs=[model_multiselect, show_labels_checkbox], outputs=query_rate_plot)
-        metric_multiselect.change(on_change, inputs=[metric_multiselect, group_radio, model_multiselect], outputs=output_plot)
-        group_radio.change(on_change, inputs=[metric_multiselect, group_radio, model_multiselect], outputs=output_plot)
-        model_multiselect.change(on_change, inputs=[metric_multiselect, group_radio, model_multiselect], outputs=output_plot)
-        model_multiselect.change(on_radar_change, inputs=model_multiselect, outputs=radar_plot)
-        model_multiselect.change(update_query_rate, inputs=[model_multiselect, show_labels_checkbox], outputs=query_rate_plot)
-        reset_data = gr.Button("Open upload data section")
-        reset_data.click(open_accordion, inputs=gr.State("reset"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc])
 interface.launch()

+import gradio as gr
+import pandas as pd
+import os
+import sys
+from qatch.connectors.sqlite_connector import SqliteConnector
+from qatch.generate_dataset.orchestrator_generator import OrchestratorGenerator
+from qatch.evaluate_dataset.orchestrator_evaluator import OrchestratorEvaluator
+#from predictor.orchestrator_predictor import OrchestratorPredictor
+import utils_get_db_tables_info
+import utilities as us
+import time
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.colors as pc
+with open('style.css', 'r') as file:
+    css = file.read()
+# DataFrame di default
+df_default = pd.DataFrame({
+    'Name': ['Alice', 'Bob', 'Charlie'],
+    'Age': [25, 30, 35],
+    'City': ['New York', 'Los Angeles', 'Chicago']
+})
+models_path = "models.csv"
+# Variabile globale per tenere traccia dei dati correnti
+df_current = df_default.copy()
+input_data = {
+    'input_method': "",
+    'data_path': "",
+    'db_name': "",
+    'data': {
+        'data_frames': {},    # dictionary of dataframes
+        'db': None             # SQLITE3 database object
+    },
+    'models': []
+}
+def load_data(file, path, use_default):
+    """Carica i dati da un file, un percorso o usa il DataFrame di default."""
+    global df_current
+    if use_default:
+        input_data["input_method"] = 'default'
+        input_data["data_path"] = os.path.join(".", "data", "data_interface", "mytable.sqlite")
+        input_data["db_name"] = os.path.splitext(os.path.basename(input_data["data_path"]))[0]
+        input_data["data"]['data_frames'] = {'MyTable': df_current}
+        if( input_data["data"]['data_frames']):
+            table2primary_key = {}
+            for table_name, df in input_data["data"]['data_frames'].items():
+                # Assign primary keys for each table
+                table2primary_key[table_name] = 'id'
+            input_data["data"]["db"] = SqliteConnector(
+                relative_db_path=input_data["data_path"],
+                db_name=input_data["db_name"],
+                tables= input_data["data"]['data_frames'],
+                table2primary_key=table2primary_key
+            )
+        df_current = df_default.copy()  # Ripristina i dati di default
+        return input_data["data"]['data_frames']
+    selected_inputs = sum([file is not None, bool(path), use_default])
+    if selected_inputs > 1:
+        return 'Errore: Selezionare solo un metodo di input alla volta.'
+    if file is not None:
+        try:
+            input_data["input_method"] = 'uploaded_file'
+            input_data["db_name"] = os.path.splitext(os.path.basename(file))[0]
+            input_data["data_path"] = os.path.join(".", "data", "data_interface",f"{input_data['db_name']}.sqlite")
+            input_data["data"] = us.load_data(file, input_data["db_name"])
+            df_current = input_data["data"]['data_frames'].get('MyTable', df_default)  # Carica il DataFrame
+            if( input_data["data"]['data_frames']):
+                table2primary_key = {}
+                for table_name, df in input_data["data"]['data_frames'].items():
+                    # Assign primary keys for each table
+                    table2primary_key[table_name] = 'id'
+                input_data["data"]["db"] = SqliteConnector(
+                    relative_db_path=input_data["data_path"],
+                    db_name=input_data["db_name"],
+                    tables= input_data["data"]['data_frames'],
+                    table2primary_key=table2primary_key
+                )
+            return input_data["data"]['data_frames']
+        except Exception as e:
+            return f'Errore nel caricamento del file: {e}'
+    """
+    if path:
+        if not os.path.exists(path):
+            return 'Errore: Il percorso specificato non esiste.'
+        try:
+            input_data["input_method"] = 'uploaded_file'
+            input_data["data_path"] = path
+            input_data["db_name"] = os.path.splitext(os.path.basename(path))[0]
+            input_data["data"] = us.load_data(input_data["data_path"], input_data["db_name"])
+            df_current = input_data["data"]['data_frames'].get('MyTable', df_default)  # Carica il DataFrame
+            return input_data["data"]['data_frames']
+        except Exception as e:
+            return f'Errore nel caricamento del file dal percorso: {e}'
+    """
+    return input_data["data"]['data_frames']
+def preview_default(use_default):
+    """Mostra il DataFrame di default se il checkbox è selezionato."""
+    if use_default:
+        return df_default  # Mostra il DataFrame di default
+    return df_current  # Mostra il DataFrame corrente, che potrebbe essere stato modificato
+def update_df(new_df):
+    """Aggiorna il DataFrame corrente."""
+    global df_current  # Usa la variabile globale per aggiornarla
+    df_current = new_df
+    return df_current
+def open_accordion(target):
+    # Apre uno e chiude l'altro
+    if target == "reset":
+        df_current = df_default.copy()
+        input_data['input_method'] = ""
+        input_data['data_path'] = ""
+        input_data['db_name'] = ""
+        input_data['data']['data_frames'] = {}
+        input_data['data']['db'] = None
+        input_data['models'] = []
+        return gr.update(open=True), gr.update(open=False, visible=False), gr.update(open=False, visible=False), gr.update(open=False, visible=False), gr.update(open=False, visible=False), gr.update(value=False), gr.update(value=None)
+    elif target == "model_selection":
+        return gr.update(open=False), gr.update(open=False), gr.update(open=True, visible=True), gr.update(open=False), gr.update(open=False)
+# Interfaccia Gradio
+with gr.Blocks(theme='d8ahazard/rd_blue', css_paths='style.css') as interface:
+    gr.Markdown("# QATCH")
+    data_state = gr.State(None)  # Memorizza i dati caricati
+    upload_acc = gr.Accordion("Upload your data section", open=True, visible=True)
+    select_table_acc = gr.Accordion("Select tables", open=False, visible=False)
+    select_model_acc = gr.Accordion("Select models", open=False, visible=False)
+    qatch_acc = gr.Accordion("QATCH execution", open=False, visible=False)
+    metrics_acc = gr.Accordion("Metrics", open=False, visible=False)
+    #metrics_acc = gr.Accordion("Metrics", open=False, visible=False, render=False)
+    #################################
+    #  PARTE DI INSERIMENTO DEL DB  #
+    #################################
+    with upload_acc:
+        gr.Markdown("## Caricamento dei Dati")
+        file_input = gr.File(label="Trascina e rilascia un file", file_types=[".csv", ".xlsx", ".sqlite"])
+        with gr.Row():
+            default_checkbox = gr.Checkbox(label="Usa DataFrame di default")
+        preview_output = gr.DataFrame(interactive=True, visible=True, value=df_default)
+        submit_button = gr.Button("Carica Dati", interactive=False)  # Disabilitato di default
+        output = gr.JSON(visible=False)  # Output dizionario
+        # Funzione per abilitare il bottone se sono presenti dati da caricare
+        def enable_submit(file, use_default):
+            return gr.update(interactive=bool(file or use_default))
+        # Funzione per deselezionare il checkbox se viene caricato un file
+        def deselect_default(file):
+            if file:
+                return gr.update(value=False)
+            return gr.update()
+        # Abilita il bottone quando i campi di input sono valorizzati
+        file_input.change(fn=enable_submit, inputs=[file_input, default_checkbox], outputs=[submit_button])
+        default_checkbox.change(fn=enable_submit, inputs=[file_input, default_checkbox], outputs=[submit_button])
+        # Mostra l'anteprima del DataFrame di default quando il checkbox è selezionato
+        default_checkbox.change(fn=preview_default, inputs=[default_checkbox], outputs=[preview_output])
+        preview_output.change(fn=update_df, inputs=[preview_output], outputs=[preview_output])
+        # Deseleziona il checkbox quando viene caricato un file
+        file_input.change(fn=deselect_default, inputs=[file_input], outputs=[default_checkbox])
+        def handle_output(file, use_default):
+            """Gestisce l'output quando si preme il bottone 'Carica Dati'."""
+            result = load_data(file, None, use_default)
+            if isinstance(result, dict):  # Se result è un dizionario di DataFrame
+                if len(result) == 1:  # Se c'è solo una tabella
+                    return (
+                        gr.update(visible=False),  # Nasconde l'output JSON
+                        result,  # Salva lo stato dei dati
+                        gr.update(visible=False),  # Nasconde la selezione tabella
+                        result,  # Mantiene lo stato dei dati
+                        gr.update(interactive=False),  # Disabilita il pulsante di submit
+                        gr.update(visible=True, open=True),  # Passa direttamente a select_model_acc
+                        gr.update(visible=True, open=False)
+                    )
+                else:
+                    return (
+                        gr.update(visible=False),
+                        result,
+                        gr.update(open=True, visible=True),
+                        result,
+                        gr.update(interactive=False),
+                        gr.update(visible=False),  # Mantiene il comportamento attuale
+                        gr.update(visible=True, open=True)
+                    )
+            else:
+                return (
+                    gr.update(visible=False),
+                    None,
+                    gr.update(open=False, visible=True),
+                    None,
+                    gr.update(interactive=True),
+                    gr.update(visible=False),
+                    gr.update(visible=True, open=True)
+                )
+        submit_button.click(
+            fn=handle_output,
+            inputs=[file_input, default_checkbox],
+            outputs=[output, output, select_table_acc, data_state, submit_button, select_model_acc, upload_acc]
+        )
+    ######################################
+    #  PARTE DI SELEZIONE DELLE TABELLE  #
+    ######################################
+    with select_table_acc:
+        table_selector = gr.CheckboxGroup(choices=[], label="Seleziona le tabelle da visualizzare", value=[])
+        table_outputs = [gr.DataFrame(label=f"Tabella {i+1}", interactive=True, visible=False) for i in range(5)]
+        selected_table_names = gr.Textbox(label="Tabelle selezionate", visible=False, interactive=False)
+        # Bottone di selezione modelli (inizialmente disabilitato)
+        open_model_selection = gr.Button("Choose your models", interactive=False)
+        def update_table_list(data):
+            """Aggiorna dinamicamente la lista delle tabelle disponibili."""
+            if isinstance(data, dict) and data:
+                table_names = list(data.keys())  # Ritorna solo i nomi delle tabelle
+                return gr.update(choices=table_names, value=[])  # Reset delle selezioni
+            return gr.update(choices=[], value=[])
+        def show_selected_tables(data, selected_tables):
+            """Mostra solo le tabelle selezionate dall'utente e abilita il bottone."""
+            updates = []
+            if isinstance(data, dict) and data:
+                available_tables = list(data.keys())  # Nomi effettivamente disponibili
+                selected_tables = [t for t in selected_tables if t in available_tables]  # Filtra selezioni valide
+                tables = {name: data[name] for name in selected_tables}  # Filtra i DataFrame
+                for i, (name, df) in enumerate(tables.items()):
+                    updates.append(gr.update(value=df, label=f"Tabella: {name}", visible=True))
+                # Se ci sono meno di 5 tabelle, nascondi gli altri DataFrame
+                for _ in range(len(tables), 5):
+                    updates.append(gr.update(visible=False))
+            else:
+                updates = [gr.update(value=pd.DataFrame(), visible=False) for _ in range(5)]
+            # Abilitare/disabilitare il bottone in base alle selezioni
+            button_state = bool(selected_tables)  # True se almeno una tabella è selezionata, False altrimenti
+            updates.append(gr.update(interactive=button_state))  # Aggiorna stato bottone
+            return updates
+        def show_selected_table_names(selected_tables):
+            """Mostra i nomi delle tabelle selezionate quando si preme il bottone."""
+            if selected_tables:
+                return gr.update(value=", ".join(selected_tables), visible=False)
+            return gr.update(value="", visible=False)
+        # Aggiorna automaticamente la lista delle checkbox quando `data_state` cambia
+        data_state.change(fn=update_table_list, inputs=[data_state], outputs=[table_selector])
+        # Aggiorna le tabelle visibili e lo stato del bottone in base alle selezioni dell'utente
+        table_selector.change(fn=show_selected_tables, inputs=[data_state, table_selector], outputs=table_outputs + [open_model_selection])
+        # Mostra la lista delle tabelle selezionate quando si preme "Choose your models"
+        open_model_selection.click(fn=show_selected_table_names, inputs=[table_selector], outputs=[selected_table_names])
+        open_model_selection.click(open_accordion, inputs=gr.State("model_selection"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc])
+    ####################################
+    #  PARTE DI SELEZIONE DEL MODELLO  #
+    ####################################
+    with select_model_acc:
+        gr.Markdown("**Model Selection**")
+        # Supponiamo che `us.read_models_csv` restituisca anche il percorso dell'immagine
+        model_list_dict = us.read_models_csv(models_path)
+        model_list = [model["code"] for model in model_list_dict]
+        model_images = [model["image_path"] for model in model_list_dict]
+        model_checkboxes = []
+        rows = []
+        # Creazione dinamica di checkbox con immagini (3 per riga)
+        for i in range(0, len(model_list), 3):
+            with gr.Row():
+                cols = []
+                for j in range(3):
+                    if i + j < len(model_list):
+                        model = model_list[i + j]
+                        image_path = model_images[i + j]
+                        with gr.Column():
+                            gr.Image(image_path, show_label=False)
+                            checkbox = gr.Checkbox(label=model, value=False)
+                            model_checkboxes.append(checkbox)
+                            cols.append(checkbox)
+                rows.append(cols)
+        selected_models_output = gr.JSON(visible=False)
+        # Funzione per ottenere i modelli selezionati
+        def get_selected_models(*model_selections):
+            selected_models = [model for model, selected in zip(model_list, model_selections) if selected]
+            input_data['models'] = selected_models
+            button_state = bool(selected_models)  # True se almeno un modello è selezionato, False altrimenti
+            return selected_models, gr.update(open=True, visible=True), gr.update(interactive=button_state)
+        # Bottone di submit (inizialmente disabilitato)
+        submit_models_button = gr.Button("Submit Models", interactive=False)
+        # Collegamento dei checkbox agli eventi di selezione
+        for checkbox in model_checkboxes:
+            checkbox.change(
+                fn=get_selected_models,
+                inputs=model_checkboxes,
+                outputs=[selected_models_output, select_model_acc, submit_models_button]
+            )
+        submit_models_button.click(
+            fn=lambda *args: (get_selected_models(*args), gr.update(open=False, visible=True), gr.update(open=True, visible=True)),
+            inputs=model_checkboxes,
+            outputs=[selected_models_output, select_model_acc, qatch_acc]
+        )
+        reset_data = gr.Button("Back to upload data section")
+        reset_data.click(open_accordion, inputs=gr.State("reset"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc, default_checkbox, file_input])
+    ###############################
+    #  PARTE DI ESECUZIONE QATCH  #
+    ###############################
+    with qatch_acc:
+        def change_text(text):
+            return text
+        def qatch_flow():
+            orchestrator_generator = OrchestratorGenerator()
+            #TODO add to target_df column target_df["columns_used"], tables selection
+            #print(input_data['data']['db'])
+            target_df = orchestrator_generator.generate_dataset(connector=input_data['data']['db'])
+            schema_text = utils_get_db_tables_info.utils_extract_db_schema_as_string(
+                db_id = input_data["db_name"],
+                base_path = input_data["data_path"],
+                normalize=False,
+                sql=None
+            )
+            # TODO QUERY PREDICTION
+            predictions_dict = {model: pd.DataFrame(columns=['id', 'question', 'predicted_sql', 'time', 'query', 'db_path']) for model in model_list}
+            metrics_conc = pd.DataFrame()
+            for model in input_data["models"]:
+                for index, row in target_df.iterrows():
+                    if len(target_df) != 0: load_value = f"##Loading... {round((index + 1) / len(target_df) * 100, 2)}%"
+                    else: load_value = "##Loading..."
+                    question = row['query']
+                    #yield gr.Textbox(question), gr.Textbox(), *[predictions_dict[model] for model in input_data["models"]], None
+                    yield gr.Markdown(value=load_value), gr.Textbox(question), gr.Textbox(), metrics_conc, *[predictions_dict[model] for model in model_list]
+                    start_time = time.time()
+                    # Simulazione della predizione
+                    time.sleep(0.03)
+                    prediction = "Prediction_placeholder"
+                    # Esegui la predizione reale qui
+                    # prediction = predictor.run(model, schema_text, question)
+                    end_time = time.time()
+                    # Crea una nuova riga come dataframe
+                    new_row = pd.DataFrame([{
+                        'id': index,
+                        'question': question,
+                        'predicted_sql': prediction,
+                        'time': end_time - start_time,
+                        'query': row["query"],
+                        'db_path': input_data["data_path"]
+                    }]).dropna(how="all")  # Rimuove solo righe completamente vuote
+                    #TODO con un for
+                    for col in target_df.columns:
+                        if col not in new_row.columns:
+                            new_row[col] = row[col]
+                    # Aggiorna il dataframe corrispondente al modello man mano
+                    if not new_row.empty:
+                        predictions_dict[model] = pd.concat([predictions_dict[model], new_row], ignore_index=True)
+                    #yield gr.Textbox(), gr.Textbox(prediction), *[predictions_dict[model] for model in input_data["models"]], None
+                    yield gr.Markdown(value=load_value), gr.Textbox(), gr.Textbox(prediction), metrics_conc, *[predictions_dict[model] for model in model_list]
+            #END
+            evaluator = OrchestratorEvaluator()
+            for model in input_data["models"]:
+                metrics_df_model = evaluator.evaluate_df(
+                    df=predictions_dict[model],
+                    target_col_name="query",              #'<target_column_name>',
+                    prediction_col_name="predicted_sql",  #'<prediction_column_name>',
+                    db_path_name= "db_path",              #'<db_path_column_name>'
+                )
+                metrics_df_model['model'] = model
+                metrics_conc = pd.concat([metrics_conc, metrics_df_model], ignore_index=True)
+                if 'valid_efficiency_score' not in metrics_conc.columns:
+                    metrics_conc['valid_efficiency_score'] = metrics_conc['VES']
+            yield gr.Markdown(), gr.Textbox(), gr.Textbox(), metrics_conc, *[predictions_dict[model] for model in model_list]
+        #Loading Bar
+        with gr.Row():
+            #progress = gr.Progress()
+            variable = gr.Markdown()
+        #NL -> MODEL -> Generated Quesy
+        with gr.Row():
+            with gr.Column():
+                question_display = gr.Textbox()
+            with gr.Column():
+                gr.Image()
+            with gr.Column():
+                prediction_display = gr.Textbox()
+        dataframe_per_model = {}
+        with gr.Tabs() as model_tabs:
+            #for model in input_data["models"]:
+            for model in model_list:
+                #TODO fix model tabs
+                with gr.TabItem(model):
+                    gr.Markdown(f"**Results for {model}**")
+                    dataframe_per_model[model] = gr.DataFrame()
+        #question_display.change(fn=change_text, inputs=[gr.State(question)], outputs=[question_display])
+        selected_models_display = gr.JSON(label="Modelli selezionati")
+        metrics_df = gr.DataFrame(visible=False)
+        metrics_df_out= gr.DataFrame(visible=False)
+        submit_models_button.click(
+            fn=qatch_flow,
+            inputs=[],
+            outputs=[variable, question_display, prediction_display, metrics_df] + list(dataframe_per_model.values())
+        )
+        submit_models_button.click(
+            fn=lambda: gr.update(value=input_data),
+            outputs=[selected_models_display]
+        )
+        #Funziona per METRICS
+        metrics_df.change(fn=change_text, inputs=[metrics_df], outputs=[metrics_df_out])
+        # def change_tab(selected_models_output, model_tabs):
+        #     for model in model_list:
+        #         if model in selected_models_output:
+        #             pass#model_tabs[model].visible = True
+        #         else:
+        #             pass#model_tabs[model].visible = False
+        #     return model_tabs
+        # selected_models_output.change(fn=change_tab, inputs=[selected_models_output, model_tabs], outputs=[])
+        proceed_to_metrics_button = gr.Button("Proceed to Metrics")
+        proceed_to_metrics_button.click(
+            fn=lambda: (gr.update(open=False, visible=True), gr.update(open=True, visible=True)),
+            outputs=[qatch_acc, metrics_acc]
+        )
+        reset_data = gr.Button("Back to upload data section")
+        reset_data.click(open_accordion, inputs=gr.State("reset"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc, default_checkbox, file_input])
+    #######################################
+    #  METRICS VISUALIZATION SECTION      #
+    #######################################
+    with metrics_acc:
+        #confirmation_text = gr.Markdown("## Metrics successfully loaded")
+        data_path = 'test_results.csv'
+        @gr.render(inputs=metrics_df_out)
+        def function_metrics(metrics_df_out):
+            def load_data_csv_es():
+                return pd.read_csv(data_path)
+                #return metrics_df_out
+            def calculate_average_metrics(df, selected_metrics):
+                df['avg_metric'] = df[selected_metrics].mean(axis=1)
+                return df
+            def generate_model_colors():
+                """Generates a unique color map for models in the dataset."""
+                df = load_data_csv_es()
+                unique_models = df['model'].unique()  # Extract unique models
+                num_models = len(unique_models)
+                # Use the Plotly color scale (you can change it if needed)
+                color_palette = pc.qualitative.Plotly  # ['#636EFA', '#EF553B', '#00CC96', ...]
+                # If there are more models than colors, cycle through them
+                colors = {model: color_palette[i % len(color_palette)] for i, model in enumerate(unique_models)}
+                return colors
+            MODEL_COLORS = generate_model_colors()
+            # BAR CHART FOR AVERAGE METRICS WITH UPDATE FUNCTION
+            def plot_metric(df, selected_metrics, group_by, selected_models):
+                df = df[df['model'].isin(selected_models)]
+                df = calculate_average_metrics(df, selected_metrics)
+                # Ensure the group_by value is always valid
+                if group_by not in [["tbl_name", "model"], ["model"]]:
+                    group_by = ["tbl_name", "model"]  # Default
+                avg_metrics = df.groupby(group_by)['avg_metric'].mean().reset_index()
+                fig = px.bar(
+                    avg_metrics,
+                    x=group_by[0],
+                    y='avg_metric',
+                    color='model',
+                    color_discrete_map=MODEL_COLORS,
+                    barmode='group',
+                    title=f'Average metric per {group_by[0]} 📊',
+                    labels={group_by[0]: group_by[0].capitalize(), 'avg_metric': 'Average Metric'},
+                    template='plotly_dark'
+                )
+                return fig
+            def update_plot(selected_metrics, group_by, selected_models):
+                df = load_data_csv_es()
+                return plot_metric(df, selected_metrics, group_by, selected_models)
+            # RADAR CHART FOR AVERAGE METRICS PER MODEL WITH UPDATE FUNCTION
+            def plot_radar(df, selected_models):
+                # Filter only selected models
+                df = df[df['model'].isin(selected_models)]
+                # Select relevant metrics
+                selected_metrics = ["cell_precision", "cell_recall", "execution_accuracy", "tuple_cardinality", "tuple_constraint"]
+                # Compute average metrics per test_category and model
+                df = calculate_average_metrics(df, selected_metrics)
+                avg_metrics = df.groupby(['model', 'test_category'])['avg_metric'].mean().reset_index()
+                # Check if data is available
+                if avg_metrics.empty:
+                    print("Error: No data available to compute averages.")
+                    return go.Figure()
+                fig = go.Figure()
+                categories = avg_metrics['test_category'].unique()
+                for model in selected_models:
+                    model_data = avg_metrics[avg_metrics['model'] == model]
+                    # Build a list of values for each category (if a value is missing, set it to 0)
+                    values = [
+                        model_data[model_data['test_category'] == cat]['avg_metric'].values[0]
+                        if cat in model_data['test_category'].values else 0
+                        for cat in categories
+                    ]
+                    fig.add_trace(go.Scatterpolar(
+                        r=values,
+                        theta=categories,
+                        fill='toself',
+                        name=model,
+                        line=dict(color=MODEL_COLORS.get(model, "gray"))
+                    ))
+                fig.update_layout(
+                    polar=dict(radialaxis=dict(visible=True, range=[0, max(avg_metrics['avg_metric'].max(), 0.5)])), # Set the radar range
+                    title='❇️ Radar Plot of Metrics per Model (Average per Category) ❇️ ',
+                    template='plotly_dark',
+                    width=700, height=700
+                )
+                return fig
+            def update_radar(selected_models):
+                df = load_data_csv_es()
+                return plot_radar(df, selected_models)
+            # LINE CHART FOR CUMULATIVE TIME WITH UPDATE FUNCTION
+            def plot_cumulative_flow(df, selected_models):
+                df = df[df['model'].isin(selected_models)]
+                fig = go.Figure()
+                for model in selected_models:
+                    model_df = df[df['model'] == model].copy()
+                    # Calculate cumulative time
+                    model_df['cumulative_time'] = model_df['time'].cumsum()
+                    # Calculate cumulative number of queries over time
+                    model_df['cumulative_queries'] = range(1, len(model_df) + 1)
+                    # Select a color for the model
+                    color = MODEL_COLORS.get(model, "gray")  # Assigned model color
+                    fillcolor = color.replace("rgb", "rgba").replace(")", ", 0.2)")  # 🔹 Makes the area semi-transparent
+                    #color = f"rgba({hash(model) % 256}, {hash(model * 2) % 256}, {hash(model * 3) % 256}, 1)"
+                    fig.add_trace(go.Scatter(
+                        x=model_df['cumulative_time'],
+                        y=model_df['cumulative_queries'],
+                        mode='lines+markers',
+                        name=model,
+                        line=dict(width=2, color=color)
+                    ))
+                    # Adds the underlying colored area (same color but transparent)
+                    """
+                    fig.add_trace(go.Scatter(
+                        x=model_df['cumulative_time'],
+                        y=model_df['cumulative_queries'],
+                        fill='tozeroy',
+                        mode='none',
+                        showlegend=False,  # Hides the area in the legend
+                        fillcolor=fillcolor
+                    ))
+                    """
+                fig.update_layout(
+                    title="Cumulative Query Flow Chart 📈",
+                    xaxis_title="Cumulative Time (s)",
+                    yaxis_title="Number of Queries Completed",
+                    template='plotly_dark',
+                    legend_title="Models"
+                )
+                return fig
+            def update_query_rate(selected_models):
+                df = load_data_csv_es()
+                return plot_cumulative_flow(df, selected_models)
+            # RANKING FOR THE TOP 3 MODELS WITH UPDATE FUNCTION
+            def ranking_text(df, selected_models, ranking_type):
+                #df = load_data_csv_es()
+                df = df[df['model'].isin(selected_models)]
+                df['valid_efficiency_score'] = pd.to_numeric(df['valid_efficiency_score'], errors='coerce')
+                if ranking_type == "valid_efficiency_score":
+                    rank_df = df.groupby('model')['valid_efficiency_score'].mean().reset_index()
+                    #rank_df = df.groupby('model')['valid_efficiency_score'].mean().reset_index()
+                    ascending_order = False  # Higher is better
+                elif ranking_type == "time":
+                    rank_df = df.groupby('model')['time'].sum().reset_index()
+                    rank_df["Ranking Value"] = rank_df["time"].round(2).astype(str) + " s"  # Adds "s" for seconds
+                    ascending_order = True  # For time, lower is better
+                elif ranking_type == "metrics":
+                    selected_metrics = ["cell_precision", "cell_recall", "execution_accuracy", "tuple_cardinality", "tuple_constraint"]
+                    df = calculate_average_metrics(df, selected_metrics)
+                    rank_df = df.groupby('model')['avg_metric'].mean().reset_index()
+                    ascending_order = False  # Higher is better
+                if ranking_type != "time":
+                    rank_df.rename(columns={rank_df.columns[1]: "Ranking Value"}, inplace=True)
+                    rank_df["Ranking Value"] = rank_df["Ranking Value"].round(2)  # Round values except for time
+                # Sort based on the selected criterion
+                rank_df = rank_df.sort_values(by="Ranking Value", ascending=ascending_order).reset_index(drop=True)
+                # Select only the top 3 models
+                rank_df = rank_df.head(3)
+                # Add medal icons for the top 3
+                medals = ["🥇", "🥈", "🥉"]
+                rank_df.insert(0, "Rank", medals[:len(rank_df)])
+                # Build the formatted ranking string
+                ranking_str = "## 🏆 Model Ranking\n"
+                for _, row in rank_df.iterrows():
+                    ranking_str += f"<span style='font-size:18px;'>{row['Rank']} {row['model']} ({row['Ranking Value']})</span><br>\n"
+                return ranking_str
+            def update_ranking_text(selected_models, ranking_type):
+                df = load_data_csv_es()
+                return ranking_text(df, selected_models, ranking_type)
+            # RANKING FOR THE 3 WORST RESULTS WITH UPDATE FUNCTION
+            def worst_cases_text(df, selected_models):
+                df = df[df['model'].isin(selected_models)]
+                selected_metrics = ["cell_precision", "cell_recall", "execution_accuracy", "tuple_cardinality", "tuple_constraint"]
+                df = calculate_average_metrics(df, selected_metrics)
+                worst_cases_df = df.groupby(['model', 'tbl_name', 'test_category', 'question', 'query', 'predicted_sql'])['avg_metric'].mean().reset_index()
+                worst_cases_df = worst_cases_df.sort_values(by="avg_metric", ascending=True).reset_index(drop=True)
+                worst_cases_top_3 = worst_cases_df.head(3)
+                worst_cases_top_3["avg_metric"] = worst_cases_top_3["avg_metric"].round(2)
+                worst_str = "## ❌ Top 3 Worst Cases\n"
+                medals = ["🥇", "🥈", "🥉"]
+                for i, row in worst_cases_top_3.iterrows():
+                    worst_str += (
+                        f"<span style='font-size:18px;'><b>{medals[i]} {row['model']} - {row['tbl_name']} - {row['test_category']}</b> ({row['avg_metric']})</span>  \n"
+                        f"<span style='font-size:16px;'>- <b>Question:</b> {row['question']}</span>  \n"
+                        f"<span style='font-size:16px;'>- <b>Original Query:</b> `{row['query']}`</span>  \n"
+                        f"<span style='font-size:16px;'>- <b>Predicted SQL:</b> `{row['predicted_sql']}`</span>  \n\n"
+                    )
+                return worst_str
+            def update_worst_cases_text(selected_models):
+                df = load_data_csv_es()
+                return worst_cases_text(df, selected_models)
+            metrics = ["cell_precision", "cell_recall", "execution_accuracy", "tuple_cardinality", "tuple_constraint"]
+            group_options = {
+                "Table": ["tbl_name", "model"],
+                "Model": ["model"]
+            }
+            df_initial = load_data_csv_es()
+            models = df_initial['model'].unique().tolist()
+            #with gr.Blocks(theme=gr.themes.Default(primary_hue='blue')) as demo:
+            gr.Markdown("""## 📊 Model Performance Analysis 📊
+            Select one or more metrics to calculate the average and visualize histograms and radar plots.
+            """)
+            # Options selection section
+            with gr.Row():
+                metric_multiselect = gr.CheckboxGroup(choices=metrics, label="Select metrics", value=metrics)
+                model_multiselect = gr.CheckboxGroup(choices=models, label="Select models", value=models)
+                group_radio = gr.Radio(choices=list(group_options.keys()), label="Select grouping", value="Model")
+            output_plot = gr.Plot()
+            query_rate_plot = gr.Plot(value=update_query_rate(models))
+            with gr.Row():
+                with gr.Column(scale=1):
+                    radar_plot = gr.Plot(value=update_radar(models))
+                with gr.Column(scale=1):
+                    ranking_type_radio = gr.Radio(
+                        ["valid_efficiency_score", "time", "metrics"],
+                        label="Choose ranking criteria",
+                        value="valid_efficiency_score"
+                    )
+                    ranking_text_display = gr.Markdown(value=update_ranking_text(models, "valid_efficiency_score"))
+                    worst_cases_display = gr.Markdown(value=update_worst_cases_text(models))
+            # Callback functions for updating charts
+            def on_change(selected_metrics, selected_group, selected_models):
+                return update_plot(selected_metrics, group_options[selected_group], selected_models)
+            def on_radar_change(selected_models):
+                return update_radar(selected_models)
+            #metrics_df_out.change(on_change, inputs=[metric_multiselect, group_radio, model_multiselect], outputs=output_plot)
+            metric_multiselect.change(on_change, inputs=[metric_multiselect, group_radio, model_multiselect], outputs=output_plot)
+            group_radio.change(on_change, inputs=[metric_multiselect, group_radio, model_multiselect], outputs=output_plot)
+            model_multiselect.change(on_change, inputs=[metric_multiselect, group_radio, model_multiselect], outputs=output_plot)
+            model_multiselect.change(update_radar, inputs=model_multiselect, outputs=radar_plot)
+            model_multiselect.change(update_ranking_text, inputs=[model_multiselect, ranking_type_radio], outputs=ranking_text_display)
+            ranking_type_radio.change(update_ranking_text, inputs=[model_multiselect, ranking_type_radio], outputs=ranking_text_display)
+            model_multiselect.change(update_worst_cases_text, inputs=model_multiselect, outputs=worst_cases_display)
+            model_multiselect.change(update_query_rate, inputs=[model_multiselect], outputs=query_rate_plot)
+            reset_data = gr.Button("Back to upload data section")
+            reset_data.click(open_accordion, inputs=gr.State("reset"), outputs=[upload_acc, select_table_acc, select_model_acc, qatch_acc, metrics_acc, default_checkbox, file_input])
+            # Hidden button to force UI refresh on load
+            force_update_button = gr.Button("", visible=False)
+            # State variable to track first load
+            load_trigger = gr.State(value=True)
+            # Function to force initial load
+            def force_update(is_first_load):
+                if is_first_load:
+                    return (
+                        update_plot(metrics, group_options["Model"], models),
+                        update_query_rate(models),
+                        update_radar(models),
+                        update_ranking_text(models, "valid_efficiency_score"),
+                        update_worst_cases_text(models),
+                        False  # Change state to prevent continuous reloads
+                    )
+                return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), False
+            # The invisible button forces chart loading only the first time
+            force_update_button.click(
+                fn=force_update,
+                inputs=[load_trigger],
+                outputs=[output_plot, query_rate_plot, radar_plot, ranking_text_display, worst_cases_display, load_trigger]
+            )
+            # Simulate button click when UI loads
+            with gr.Blocks() as demo:
+                demo.load(
+                    lambda: force_update(True),
+                    outputs=[output_plot, query_rate_plot, radar_plot, ranking_text_display, worst_cases_display, load_trigger]
+                )
 interface.launch()

style.css CHANGED Viewed

@@ -22,4 +22,9 @@
     display: block;
     margin: 10px auto 0;
     border-radius: 2px;
-}

     display: block;
     margin: 10px auto 0;
     border-radius: 2px;
+}
+#bar_plot, #line_plot {
+    width: 100% !important;
+    max-width: none !important;
+}

test_results.csv ADDED Viewed

	@@ -0,0 +1,101 @@

+model,tbl_name,test_category,question,query,predicted_sql,cell_precision,cell_recall,execution_accuracy,tuple_cardinality,tuple_constraint,time,valid_efficiency_score,tuple_order
+Model_B,Table_2,WHERE,Mostra il prodotto più venduto,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT SUM(sales) FROM orders;,0.15,0.89,0.84,0.92,0.97,13,10,
+Model_C,Table_3,GROUPBY,Quali clienti hanno speso di più?,SELECT * FROM users;,SELECT * FROM customers ORDER BY total_spent DESC;,0.09,0.1,0.02,0.22,0.42,3,4,
+Model_A,Table_3,ORDERBY,Mostra il prodotto più venduto,SELECT * FROM users;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.9,0.81,0.18,0.56,0.52,11,11,0.39
+Model_C,Table_3,WHERE,Qual è la media dei prezzi?,SELECT SUM(sales) FROM orders;,SELECT * FROM users;,0.87,0.26,0.83,0.37,0.83,2,1,
+Model_B,Table_3,ORDERBY,Quali clienti hanno speso di più?,SELECT AVG(price) FROM products;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.62,0.49,0.66,0.95,0.68,13,2,0.38
+Model_C,Table_3,JOIN,Qual è la media dei prezzi?,SELECT AVG(price) FROM products;,SELECT * FROM customers ORDER BY total_spent DESC;,0.54,0.2,0.83,0.43,0.64,11,14,
+Model_B,Table_1,ORDERBY,Ordina i clienti per spesa,SELECT SUM(sales) FROM orders;,SELECT AVG(price) FROM products;,0.35,0.12,0.51,0.78,0.94,9,12,0.06
+Model_C,Table_2,SELECT,Qual è la media dei prezzi?,SELECT AVG(price) FROM products;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.96,0.23,0.38,0.26,0.13,11,8,
+Model_C,Table_2,JOIN,Elenca tutti gli utenti,SELECT * FROM customers ORDER BY total_spent DESC;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.5,0.07,0.02,0.82,0.75,2,5,
+Model_A,Table_1,JOIN,Trova il totale delle vendite,SELECT * FROM users;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.03,0.81,0.15,0.64,0.98,12,8,
+Model_B,Table_3,JOIN,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,SELECT AVG(price) FROM products;,0.95,0.63,0.94,0.31,0.94,12,15,
+Model_A,Table_3,SELECT,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.26,0.81,0.45,0.7,0.77,9,0,
+Model_C,Table_3,WHERE,Ordina i clienti per spesa,SELECT * FROM users;,SELECT * FROM customers ORDER BY total_spent DESC;,0.65,0.22,0.61,0.56,0.84,5,11,
+Model_C,Table_1,WHERE,Trova il totale delle vendite,SELECT * FROM users;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.3,0.83,0.14,0.22,0.7,14,2,
+Model_A,Table_3,JOIN,Elenca tutti gli utenti,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;","SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.66,0.57,0.07,0.93,0.16,15,11,
+Model_C,Table_1,WHERE,Elenca tutti gli utenti,SELECT SUM(sales) FROM orders;,SELECT SUM(sales) FROM orders;,0.39,0.97,0.87,0.99,0.27,13,4,
+Model_A,Table_2,SELECT,Elenca tutti gli utenti,SELECT SUM(sales) FROM orders;,SELECT * FROM users;,0.64,0.44,0.03,0.81,0.43,14,6,
+Model_A,Table_3,GROUPBY,Quali clienti hanno speso di più?,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT * FROM customers ORDER BY total_spent DESC;,0.55,0.32,0.28,0.12,0.79,7,3,
+Model_B,Table_3,WHERE,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,SELECT * FROM users;,0.56,0.27,0.34,0.59,0.59,10,15,
+Model_A,Table_2,SELECT,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.6,0.83,0.41,0.28,0.02,10,8,
+Model_C,Table_2,SELECT,Elenca tutti gli utenti,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT AVG(price) FROM products;,0.77,0.5,0.13,0.74,0.36,2,4,
+Model_C,Table_1,ORDERBY,Elenca tutti gli utenti,SELECT AVG(price) FROM products;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.5,0.33,0.91,0.05,0.71,3,11,0.83
+Model_C,Table_2,WHERE,Mostra il prodotto più venduto,SELECT SUM(sales) FROM orders;,SELECT AVG(price) FROM products;,0.74,0.62,0.64,0.26,0.05,1,5,
+Model_B,Table_3,JOIN,Qual è la media dei prezzi?,SELECT SUM(sales) FROM orders;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.98,0.73,0.52,0.12,0.72,1,6,
+Model_A,Table_2,WHERE,Elenca tutti gli utenti,SELECT AVG(price) FROM products;,SELECT AVG(price) FROM products;,0.9,0.84,0.57,0.86,0.66,10,1,
+Model_A,Table_1,WHERE,Elenca tutti gli utenti,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT SUM(sales) FROM orders;,0.21,0.73,0.97,0.58,0.04,5,4,
+Model_C,Table_3,GROUPBY,Qual è la media dei prezzi?,SELECT * FROM users;,SELECT * FROM users;,0.63,0.51,0.14,0.24,0.62,3,6,
+Model_A,Table_1,ORDERBY,Elenca tutti gli utenti,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.39,0.42,0.88,0.45,0.24,9,1,0.95
+Model_B,Table_3,JOIN,Quali clienti hanno speso di più?,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT SUM(sales) FROM orders;,0.86,0.59,0.53,0.91,0.9,4,8,
+Model_C,Table_3,JOIN,Ordina i clienti per spesa,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT * FROM users;,0.86,0.57,0.26,0.47,0.5,13,13,
+Model_B,Table_2,GROUPBY,Mostra il prodotto più venduto,SELECT SUM(sales) FROM orders;,SELECT AVG(price) FROM products;,0.67,0.7,0.1,0.42,0.31,9,14,
+Model_C,Table_3,GROUPBY,Trova il totale delle vendite,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT AVG(price) FROM products;,0.74,0.26,0.64,0.33,0.09,9,7,
+Model_C,Table_2,GROUPBY,Mostra il prodotto più venduto,SELECT AVG(price) FROM products;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.31,0.75,0.73,0.3,0.25,8,10,
+Model_B,Table_2,JOIN,Elenca tutti gli utenti,SELECT AVG(price) FROM products;,SELECT * FROM users;,0.83,0.63,0.43,0.0,0.1,13,9,
+Model_C,Table_1,ORDERBY,Qual è la media dei prezzi?,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT * FROM customers ORDER BY total_spent DESC;,1.0,0.48,0.96,0.45,0.66,4,5,0.8
+Model_A,Table_3,JOIN,Trova il totale delle vendite,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT * FROM customers ORDER BY total_spent DESC;,0.97,0.46,0.34,0.57,0.21,15,4,
+Model_C,Table_3,SELECT,Quali clienti hanno speso di più?,SELECT * FROM users;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.72,0.02,0.64,0.62,0.83,11,7,
+Model_A,Table_2,GROUPBY,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.16,0.67,0.2,0.62,0.82,9,15,
+Model_C,Table_2,ORDERBY,Quali clienti hanno speso di più?,SELECT SUM(sales) FROM orders;,SELECT AVG(price) FROM products;,0.38,0.13,0.96,0.36,0.9,14,5,0.01
+Model_A,Table_2,GROUPBY,Elenca tutti gli utenti,SELECT AVG(price) FROM products;,SELECT * FROM customers ORDER BY total_spent DESC;,0.73,0.77,0.24,0.35,0.77,1,2,
+Model_C,Table_1,JOIN,Mostra il prodotto più venduto,SELECT SUM(sales) FROM orders;,SELECT * FROM users;,0.46,0.51,0.79,0.1,0.87,7,5,
+Model_C,Table_3,WHERE,Ordina i clienti per spesa,SELECT SUM(sales) FROM orders;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.03,0.53,0.5,0.69,0.45,7,3,
+Model_C,Table_3,JOIN,Elenca tutti gli utenti,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT AVG(price) FROM products;,0.27,0.94,0.41,0.07,0.61,7,14,
+Model_A,Table_2,WHERE,Qual è la media dei prezzi?,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT SUM(sales) FROM orders;,0.04,0.8,0.59,0.06,0.18,9,10,
+Model_C,Table_3,WHERE,Trova il totale delle vendite,SELECT * FROM users;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.99,0.17,0.2,0.74,0.59,3,14,
+Model_B,Table_1,WHERE,Trova il totale delle vendite,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT * FROM users;,0.0,0.39,0.77,0.04,0.5,1,2,
+Model_C,Table_3,SELECT,Ordina i clienti per spesa,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT * FROM customers ORDER BY total_spent DESC;,0.69,0.77,0.07,0.97,0.21,7,4,
+Model_C,Table_2,JOIN,Quali clienti hanno speso di più?,SELECT SUM(sales) FROM orders;,SELECT SUM(sales) FROM orders;,0.05,0.6,0.47,0.08,0.83,10,11,
+Model_B,Table_2,WHERE,Qual è la media dei prezzi?,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT SUM(sales) FROM orders;,0.18,0.8,0.01,0.26,0.79,4,9,
+Model_A,Table_2,WHERE,Quali clienti hanno speso di più?,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT * FROM users;,0.83,0.69,0.25,0.27,0.73,6,15,
+Model_C,Table_2,GROUPBY,Qual è la media dei prezzi?,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT * FROM users;,0.8,0.0,0.2,0.11,0.09,6,10,
+Model_A,Table_1,JOIN,Qual è la media dei prezzi?,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT * FROM customers ORDER BY total_spent DESC;,0.33,0.0,0.01,0.65,0.38,12,2,
+Model_B,Table_2,GROUPBY,Trova il totale delle vendite,SELECT AVG(price) FROM products;,SELECT AVG(price) FROM products;,0.3,0.79,0.37,0.66,0.07,2,14,
+Model_A,Table_1,GROUPBY,Mostra il prodotto più venduto,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT AVG(price) FROM products;,0.8,0.21,0.4,0.93,0.61,3,8,
+Model_B,Table_2,GROUPBY,Trova il totale delle vendite,SELECT AVG(price) FROM products;,SELECT * FROM customers ORDER BY total_spent DESC;,0.48,0.8,0.62,0.72,0.64,8,8,
+Model_B,Table_3,ORDERBY,Elenca tutti gli utenti,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT SUM(sales) FROM orders;,0.94,0.61,0.07,0.42,0.74,9,6,0.63
+Model_B,Table_2,WHERE,Ordina i clienti per spesa,SELECT * FROM users;,SELECT AVG(price) FROM products;,0.02,0.63,0.97,0.62,0.34,2,6,
+Model_C,Table_2,WHERE,Trova il totale delle vendite,SELECT AVG(price) FROM products;,SELECT SUM(sales) FROM orders;,0.61,0.87,0.56,0.55,0.11,6,4,
+Model_B,Table_2,WHERE,Elenca tutti gli utenti,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT AVG(price) FROM products;,0.4,0.44,0.72,0.01,0.78,2,1,
+Model_A,Table_3,ORDERBY,Qual è la media dei prezzi?,SELECT SUM(sales) FROM orders;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.89,0.48,0.37,0.8,0.57,2,11,0.83
+Model_A,Table_2,GROUPBY,Trova il totale delle vendite,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;","SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.95,0.51,0.17,0.55,0.06,10,4,
+Model_A,Table_3,JOIN,Qual è la media dei prezzi?,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT * FROM customers ORDER BY total_spent DESC;,0.47,0.56,0.89,0.86,0.06,1,2,
+Model_A,Table_2,ORDERBY,Mostra il prodotto più venduto,SELECT * FROM users;,SELECT * FROM customers ORDER BY total_spent DESC;,0.3,0.5,0.69,0.51,0.07,13,1,0.59
+Model_C,Table_2,GROUPBY,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.81,0.37,0.51,0.6,0.45,3,9,
+Model_B,Table_3,WHERE,Trova il totale delle vendite,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT AVG(price) FROM products;,0.62,0.65,0.26,0.52,0.05,4,3,
+Model_A,Table_1,GROUPBY,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,SELECT * FROM users;,0.17,0.28,0.87,0.95,0.81,10,7,
+Model_B,Table_1,GROUPBY,Trova il totale delle vendite,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.2,0.16,0.28,0.95,0.64,9,2,
+Model_C,Table_2,JOIN,Qual è la media dei prezzi?,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT SUM(sales) FROM orders;,0.89,0.78,0.56,0.84,0.13,3,11,
+Model_B,Table_1,ORDERBY,Trova il totale delle vendite,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;","SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.91,0.35,0.97,0.99,0.97,12,5,0.9
+Model_A,Table_1,WHERE,Elenca tutti gli utenti,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT SUM(sales) FROM orders;,0.59,0.72,0.77,0.64,0.75,14,2,
+Model_B,Table_2,JOIN,Ordina i clienti per spesa,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT * FROM users;,0.05,0.3,0.37,0.22,0.31,6,6,
+Model_C,Table_3,JOIN,Mostra il prodotto più venduto,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.69,0.34,0.94,0.22,0.94,11,7,
+Model_B,Table_2,JOIN,Qual è la media dei prezzi?,SELECT SUM(sales) FROM orders;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.4,0.79,0.72,0.82,0.98,6,5,
+Model_C,Table_1,ORDERBY,Elenca tutti gli utenti,SELECT * FROM users;,SELECT SUM(sales) FROM orders;,0.57,0.71,0.04,0.32,0.55,12,0,0.43
+Model_A,Table_1,SELECT,Quali clienti hanno speso di più?,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT * FROM users;,0.84,0.62,0.32,0.28,0.16,2,5,
+Model_A,Table_3,ORDERBY,Ordina i clienti per spesa,SELECT AVG(price) FROM products;,SELECT * FROM customers ORDER BY total_spent DESC;,0.77,0.81,0.47,0.46,0.82,2,10,0.66
+Model_C,Table_2,ORDERBY,Trova il totale delle vendite,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.86,0.28,0.14,0.75,0.37,13,14,0.25
+Model_A,Table_1,ORDERBY,Qual è la media dei prezzi?,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.9,0.93,0.89,0.88,0.25,13,3,0.92
+Model_C,Table_3,JOIN,Quali clienti hanno speso di più?,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.59,0.9,0.12,0.58,0.69,14,2,
+Model_C,Table_1,JOIN,Quali clienti hanno speso di più?,SELECT SUM(sales) FROM orders;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.72,0.2,0.67,0.36,0.42,5,14,
+Model_A,Table_2,JOIN,Trova il totale delle vendite,SELECT * FROM users;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.17,0.74,0.12,0.4,0.27,11,15,
+Model_A,Table_1,SELECT,Elenca tutti gli utenti,SELECT AVG(price) FROM products;,SELECT * FROM customers ORDER BY total_spent DESC;,0.34,0.59,0.8,0.15,0.58,11,3,
+Model_A,Table_1,ORDERBY,Ordina i clienti per spesa,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.4,0.64,0.18,0.22,0.03,15,8,0.33
+Model_C,Table_2,ORDERBY,Qual è la media dei prezzi?,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT * FROM customers ORDER BY total_spent DESC;,0.32,0.31,0.95,0.97,0.21,7,6,0.02
+Model_B,Table_2,JOIN,Elenca tutti gli utenti,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;","SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.37,0.41,0.36,0.3,0.6,11,10,
+Model_A,Table_1,SELECT,Trova il totale delle vendite,SELECT * FROM users;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.5,0.88,0.64,0.54,0.63,8,11,
+Model_C,Table_1,GROUPBY,Trova il totale delle vendite,SELECT * FROM users;,SELECT * FROM users;,0.19,0.6,0.4,0.49,0.21,13,3,
+Model_C,Table_3,JOIN,Trova il totale delle vendite,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT * FROM users;,0.4,0.92,0.33,0.19,0.67,13,3,
+Model_B,Table_3,JOIN,Elenca tutti gli utenti,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,SELECT * FROM users;,0.76,0.67,0.51,1.0,0.22,10,14,
+Model_C,Table_1,ORDERBY,Quali clienti hanno speso di più?,SELECT AVG(price) FROM products;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.07,0.72,0.69,0.21,0.15,15,3,0.63
+Model_C,Table_1,GROUPBY,Ordina i clienti per spesa,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.71,0.54,0.24,0.35,0.23,7,2,
+Model_C,Table_1,WHERE,Elenca tutti gli utenti,SELECT AVG(price) FROM products;,SELECT * FROM users;,0.83,0.36,0.52,0.18,0.06,10,7,
+Model_C,Table_2,JOIN,Elenca tutti gli utenti,SELECT * FROM users;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.19,0.01,0.34,0.79,0.29,14,7,
+Model_C,Table_1,GROUPBY,Trova il totale delle vendite,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.14,0.74,0.76,0.88,0.01,5,9,
+Model_A,Table_2,JOIN,Ordina i clienti per spesa,SELECT SUM(sales) FROM orders;,SELECT * FROM users;,0.66,0.36,0.97,0.46,0.85,5,1,
+Model_A,Table_2,ORDERBY,Mostra il prodotto più venduto,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.39,0.8,0.88,0.06,0.84,4,14,0.96
+Model_C,Table_3,GROUPBY,Elenca tutti gli utenti,SELECT * FROM customers ORDER BY total_spent DESC;,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,0.17,0.93,0.64,0.84,0.72,6,8,
+Model_A,Table_3,ORDERBY,Elenca tutti gli utenti,SELECT product FROM sales ORDER BY count DESC LIMIT 1;,"SELECT customer, SUM(amount) FROM payments GROUP BY customer ORDER BY SUM(amount) DESC;",0.55,0.16,0.54,0.87,0.55,8,1,0.94
+Model_B,Table_1,ORDERBY,Ordina i clienti per spesa,SELECT SUM(sales) FROM orders;,SELECT * FROM customers ORDER BY total_spent DESC;,0.92,0.59,0.25,0.57,0.08,2,5,0.47
+Model_C,Table_1,JOIN,Mostra il prodotto più venduto,SELECT * FROM users;,SELECT * FROM customers ORDER BY total_spent DESC;,0.7,0.88,0.65,0.64,0.92,13,13,

utilities.py CHANGED Viewed

@@ -3,8 +3,9 @@ import pandas as pd
 import sqlite3
 import gradio as gr
 import os
-def carica_sqlite(file_path):
     conn = sqlite3.connect(file_path)
     cursor = conn.cursor()
     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
@@ -17,7 +18,11 @@ def carica_sqlite(file_path):
         df = pd.read_sql_query(f"SELECT * FROM {nome_tabella}", conn)
         dfs[nome_tabella] = df
     conn.close()
-    data_output = {'data_frames': dfs,'db': conn}
     return data_output
 # Funzione per leggere un file CSV
@@ -37,7 +42,7 @@ def load_data(data_path : str, db_name : str):
     data_output = {'data_frames': {} ,'db': None}
     table_name = os.path.splitext(os.path.basename(data_path))[0]
     if data_path.endswith(".sqlite") :
-        data_output = carica_sqlite(data_path)
     elif data_path.endswith(".csv"):
         data_output['data_frames'] = {f"{table_name}_table" : carica_csv(data_path)}
     elif data_path.endswith(".xlsx"):

 import sqlite3
 import gradio as gr
 import os
+from qatch.connectors.sqlite_connector import SqliteConnector
+def carica_sqlite(file_path, db_id):
     conn = sqlite3.connect(file_path)
     cursor = conn.cursor()
     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
         df = pd.read_sql_query(f"SELECT * FROM {nome_tabella}", conn)
         dfs[nome_tabella] = df
     conn.close()
+    data_output = {'data_frames': dfs,'db': None}
+    # data_output['db'] = SqliteConnector(
+    #     relative_db_path=file_path,
+    #     db_name=db_id,
+    # )
     return data_output
 # Funzione per leggere un file CSV
     data_output = {'data_frames': {} ,'db': None}
     table_name = os.path.splitext(os.path.basename(data_path))[0]
     if data_path.endswith(".sqlite") :
+        data_output = carica_sqlite(data_path, db_name)
     elif data_path.endswith(".csv"):
         data_output['data_frames'] = {f"{table_name}_table" : carica_csv(data_path)}
     elif data_path.endswith(".xlsx"):

utils_get_db_tables_info.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import sqlite3
+import re
+def utils_extract_db_schema_as_string(
+    db_id, base_path, normalize=False, sql: str | None = None
+):
+    """
+    Extracts the full schema of an SQLite database into a single string.
+    :param base_path: Base path where the database is located.
+    :param db_id: Path to the SQLite database file.
+    :param normalize: Whether to normalize the schema string.
+    :param sql: Optional SQL query to filter specific tables.
+    :return: Schema of the database as a single string.
+    """
+    #db_path = os.path.join(base_path, db_id, f"{db_id}.sqlite")
+    # Connect to the SQLite database
+    #if not os.path.exists(db_path):
+    #    raise FileNotFoundError(f"Database file not found at: {db_path}")
+    connection = sqlite3.connect(base_path)
+    cursor = connection.cursor()
+    # Get the schema entries based on the provided SQL query
+    schema_entries = _get_schema_entries(cursor, sql)
+    # Combine all schema definitions into a single string
+    schema_string = _combine_schema_entries(schema_entries, normalize)
+    return schema_string
+def _get_schema_entries(cursor, sql):
+    """
+    Retrieves schema entries from the SQLite database.
+    :param cursor: SQLite cursor object.
+    :param sql: Optional SQL query to filter specific tables.
+    :return: List of schema entries.
+    """
+    if sql:
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        tables = [tbl[0] for tbl in cursor.fetchall() if tbl[0].lower() in sql.lower()]
+        if tables:
+            tbl_names = ", ".join(f"'{tbl}'" for tbl in tables)
+            query = f"SELECT sql FROM sqlite_master WHERE type='table' AND name IN ({tbl_names}) AND sql IS NOT NULL;"
+        else:
+            query = "SELECT sql FROM sqlite_master WHERE sql IS NOT NULL;"
+    else:
+        query = "SELECT sql FROM sqlite_master WHERE sql IS NOT NULL;"
+    cursor.execute(query)
+    return cursor.fetchall()
+def _combine_schema_entries(schema_entries, normalize):
+    """
+    Combines schema entries into a single string.
+    :param schema_entries: List of schema entries.
+    :param normalize: Whether to normalize the schema string.
+    :return: Combined schema string.
+    """
+    if not normalize:
+        return "\n".join(entry[0] for entry in schema_entries)
+    return "\n".join(
+        re.sub(
+            r"\s*\)",
+            ")",
+            re.sub(
+                r"\(\s*",
+                "(",
+                re.sub(
+                    r"(`\w+`)\s+\(",
+                    r"\1(",
+                    re.sub(
+                        r"^\s*([^\s(]+)",
+                        r"`\1`",
+                        re.sub(
+                            r"\s+",
+                            " ",
+                            entry[0].replace("CREATE TABLE", "").replace("\t", " "),
+                        ).strip(),
+                    ),
+                ),
+            ),
+        )
+        for entry in schema_entries
+    )