Files changed (3) hide show
  1. app.py +31 -30
  2. concatenated_output.csv +0 -0
  3. tables_dict_beaver.pkl +3 -0
app.py CHANGED
@@ -28,7 +28,9 @@ import utilities as us
28
  # def wrapper(*args, **kwargs):
29
  # return func(*args, **kwargs)
30
  # return wrapper
31
- pnp_path = "evaluation_p_np_metrics.csv"
 
 
32
 
33
  js_func = """
34
  function refresh() {
@@ -57,8 +59,11 @@ models_path ="models.csv"
57
  df_current = df_default.copy()
58
 
59
  description = """## πŸ“Š Comparison of Proprietary and Non-Proprietary Databases
60
- ### ➀ **Proprietary** (πŸ’° Economic, πŸ₯ Medical, πŸ’³ Financial, πŸ“‚ Miscellaneous)
61
- ### ➀ **Non-Proprietary** (πŸ•·οΈ Spider 1.0)"""
 
 
 
62
  prompt_default = "Translate the following question in SQL code to be executed over the database to fetch the answer.\nReturn the sql code in ```sql ```\nQuestion\n{question}\nDatabase Schema\n{db_schema}\n"
63
 
64
  input_data = {
@@ -134,7 +139,7 @@ def load_data(file, path, use_default):
134
  #input_data["data_path"] = os.path.join(".", "data", "spider_databases", "defeault.sqlite")
135
  #input_data["db_name"] = "default"
136
  #input_data["data"]['db'] = SqliteConnector(relative_db_path=input_data["data_path"], db_name=input_data["db_name"])
137
- input_data["data"]['data_frames'] = us.load_tables_dict_from_pkl('tables_dict.pkl')
138
  return input_data["data"]['data_frames']
139
 
140
  selected_inputs = sum([file is not None, bool(path), use_default])
@@ -324,18 +329,23 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
324
  available_tables = []
325
  for name, df in data.items():
326
  df_real = data_frames.get(name, None)
327
- if df_real is not None and df_real.shape[1] > 15:
328
- excluded_tables.append(name)
 
 
 
329
  else:
330
  available_tables.append(name)
331
 
332
- if input_data['input_method'] == "default" or len(available_tables) < 6:
 
 
 
 
333
  table_names.append("All")
334
 
335
  table_names.extend(available_tables)
336
-
337
- # Prepara il testo da mostrare
338
- if excluded_tables:
339
  excluded_text = "<b>⚠️ The following tables have more than 15 columns and cannot be selected:</b><br>" + "<br>".join(f"- {t}" for t in excluded_tables)
340
  excluded_visible = True
341
  else:
@@ -359,7 +369,10 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
359
  available_tables = []
360
  for name, df in data.items():
361
  df_real = data_frames.get(name)
362
- if df_real is not None and df_real.shape[1] <= 15:
 
 
 
363
  available_tables.append(name)
364
 
365
  input_method = input_data['input_method']
@@ -413,6 +426,8 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
413
  available_tables = list(data.keys()) # Actually available names
414
  if "All" in selected_tables:
415
  selected_tables = available_tables
 
 
416
  input_data['data']['selected_tables'] = selected_tables
417
  return gr.update(value=", ".join(selected_tables), visible=False)
418
  return gr.update(value="", visible=False)
@@ -827,14 +842,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
827
 
828
  with gr.Tabs() as model_tabs:
829
  tab_dict = {}
830
-
831
- # for model, model_name in zip(model_list, model_names):
832
- # with gr.TabItem(model_name, visible=(model in input_data["models"])) as tab:
833
- # gr.Markdown(f"**Results for {model_name}**")
834
- # tab_dict[model] = tab
835
- # dataframe_per_model[model] = gr.DataFrame()
836
- #model_mapping = dict(zip(model_list, model_names))
837
- #model_mapping_reverse = dict(zip(model_names, model_list))
838
  for model, model_name in zip(model_list, model_names):
839
  with gr.TabItem(model_name, visible=(model in input_data["models"])) as tab:
840
  gr.Markdown(f"**Results for {model}**")
@@ -842,7 +850,6 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
842
  dataframe_per_model[model] = gr.DataFrame()
843
  # download_pred_model = gr.DownloadButton(label="Download Prediction per Model", visible=False)
844
 
845
-
846
  evaluation_loading = gr.Markdown()
847
 
848
  def change_tab():
@@ -890,11 +897,7 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
890
  fn=lambda: gr.update(visible=False),
891
  outputs=[download_metrics]
892
  )
893
- #TODO WHY?
894
- # download_metrics.click(
895
- # fn=lambda: gr.update(open=True, visible=True),
896
- # outputs=[download_metrics]
897
- # )
898
  def refresh():
899
  global reset_flag
900
  reset_flag = True
@@ -941,12 +944,12 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
941
  ####################################
942
 
943
  def load_data_csv_es():
944
- #return pd.read_csv(data_path)
945
- #print("---------------->",metrics_df_out)
946
 
947
  if input_data["input_method"]=="default":
948
  df = pd.read_csv(pnp_path)
949
  df = df[df['model'].isin(input_data["models"])]
 
 
950
  df['model'] = df['model'].replace('DeepSeek-R1-Distill-Llama-70B', 'DS-Llama3 70B')
951
  df['model'] = df['model'].replace('gpt-3.5', 'GPT-3.5')
952
  df['model'] = df['model'].replace('gpt-4o-mini', 'GPT-4o-mini')
@@ -1150,7 +1153,6 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
1150
 
1151
  avg_metrics = df.groupby(["db_category", "model"])['avg_metric'].mean().reset_index()
1152
  avg_metrics['text_label'] = avg_metrics['avg_metric'].apply(lambda x: f'{x:.2f}')
1153
- #MIAO
1154
  fig = px.bar(
1155
  avg_metrics,
1156
  x='db_category',
@@ -1277,14 +1279,13 @@ with gr.Blocks(theme='shivi/calm_seafoam', css_paths='style.css', js=js_func) as
1277
  df = load_data_csv_es()
1278
  return plot_metric_propietary(df, radio_metric, qatch_selected_metrics, external_selected_metric, selected_models)
1279
 
1280
-
1281
  # BAR CHART FOR PROPIETARY DATASET WITH AVERAGE METRICS WITH UPDATE FUNCTION
1282
 
1283
  def lollipop_propietary(selected_models):
1284
  df = load_data_csv_es()
1285
 
1286
  # Filtra solo le categorie rilevanti
1287
- target_cats = ["Spider", "Economic", "Financial", "Medical", "Miscellaneous"]
1288
  df = df[df['db_category'].isin(target_cats)]
1289
  df = df[df['model'].isin(selected_models)]
1290
 
 
28
  # def wrapper(*args, **kwargs):
29
  # return func(*args, **kwargs)
30
  # return wrapper
31
+ #pnp_path = os.path.join("data", "evaluation_p_np_metrics.csv")
32
+ pnp_path = "concatenated_output.csv"
33
+ PATH_PKL_TABLES = 'tables_dict_beaver.pkl'
34
 
35
  js_func = """
36
  function refresh() {
 
59
  df_current = df_default.copy()
60
 
61
  description = """## πŸ“Š Comparison of Proprietary and Non-Proprietary Databases
62
+ ### ➀ **Proprietary** :
63
+ ### &ensp;&ensp;&ensp; β‡’ Economic πŸ’°, Medical πŸ₯, Financial πŸ’³, Miscellaneous πŸ“‚
64
+ ### &ensp;&ensp;&ensp; β‡’ BEAVER (FAC BUILDING ADDRESS 🏒 , TIME QUARTER ⏱️)
65
+ ### ➀ **Non-Proprietary**
66
+ ### &ensp;&ensp;&ensp; β‡’ Spider 1.0 πŸ•·οΈ"""
67
  prompt_default = "Translate the following question in SQL code to be executed over the database to fetch the answer.\nReturn the sql code in ```sql ```\nQuestion\n{question}\nDatabase Schema\n{db_schema}\n"
68
 
69
  input_data = {
 
139
  #input_data["data_path"] = os.path.join(".", "data", "spider_databases", "defeault.sqlite")
140
  #input_data["db_name"] = "default"
141
  #input_data["data"]['db'] = SqliteConnector(relative_db_path=input_data["data_path"], db_name=input_data["db_name"])
142
+ input_data["data"]['data_frames'] = us.load_tables_dict_from_pkl(PATH_PKL_TABLES)
143
  return input_data["data"]['data_frames']
144
 
145
  selected_inputs = sum([file is not None, bool(path), use_default])
 
329
  available_tables = []
330
  for name, df in data.items():
331
  df_real = data_frames.get(name, None)
332
+ if input_data['input_method'] != "default":
333
+ if df_real is not None and df_real.shape[1] > 15:
334
+ excluded_tables.append(name)
335
+ else:
336
+ available_tables.append(name)
337
  else:
338
  available_tables.append(name)
339
 
340
+
341
+ if input_data['input_method'] == "default":
342
+ table_names.append("All")
343
+ excluded_tables = []
344
+ elif len(available_tables) < 6:
345
  table_names.append("All")
346
 
347
  table_names.extend(available_tables)
348
+ if excluded_tables and input_data['input_method'] != "default" :
 
 
349
  excluded_text = "<b>⚠️ The following tables have more than 15 columns and cannot be selected:</b><br>" + "<br>".join(f"- {t}" for t in excluded_tables)
350
  excluded_visible = True
351
  else:
 
369
  available_tables = []
370
  for name, df in data.items():
371
  df_real = data_frames.get(name)
372
+ if input_data['input_method'] != "default" :
373
+ if df_real is not None and df_real.shape[1] <= 15:
374
+ available_tables.append(name)
375
+ else:
376
  available_tables.append(name)
377
 
378
  input_method = input_data['input_method']
 
426
  available_tables = list(data.keys()) # Actually available names
427
  if "All" in selected_tables:
428
  selected_tables = available_tables
429
+ if (input_data['input_method'] != "default") : selected_tables = [t for t in selected_tables if len(data[t].columns) <= 15]
430
+
431
  input_data['data']['selected_tables'] = selected_tables
432
  return gr.update(value=", ".join(selected_tables), visible=False)
433
  return gr.update(value="", visible=False)
 
842
 
843
  with gr.Tabs() as model_tabs:
844
  tab_dict = {}
845
+
 
 
 
 
 
 
 
846
  for model, model_name in zip(model_list, model_names):
847
  with gr.TabItem(model_name, visible=(model in input_data["models"])) as tab:
848
  gr.Markdown(f"**Results for {model}**")
 
850
  dataframe_per_model[model] = gr.DataFrame()
851
  # download_pred_model = gr.DownloadButton(label="Download Prediction per Model", visible=False)
852
 
 
853
  evaluation_loading = gr.Markdown()
854
 
855
  def change_tab():
 
897
  fn=lambda: gr.update(visible=False),
898
  outputs=[download_metrics]
899
  )
900
+
 
 
 
 
901
  def refresh():
902
  global reset_flag
903
  reset_flag = True
 
944
  ####################################
945
 
946
  def load_data_csv_es():
 
 
947
 
948
  if input_data["input_method"]=="default":
949
  df = pd.read_csv(pnp_path)
950
  df = df[df['model'].isin(input_data["models"])]
951
+ df = df[df['tbl_name'].isin(input_data["data"]["selected_tables"])]
952
+
953
  df['model'] = df['model'].replace('DeepSeek-R1-Distill-Llama-70B', 'DS-Llama3 70B')
954
  df['model'] = df['model'].replace('gpt-3.5', 'GPT-3.5')
955
  df['model'] = df['model'].replace('gpt-4o-mini', 'GPT-4o-mini')
 
1153
 
1154
  avg_metrics = df.groupby(["db_category", "model"])['avg_metric'].mean().reset_index()
1155
  avg_metrics['text_label'] = avg_metrics['avg_metric'].apply(lambda x: f'{x:.2f}')
 
1156
  fig = px.bar(
1157
  avg_metrics,
1158
  x='db_category',
 
1279
  df = load_data_csv_es()
1280
  return plot_metric_propietary(df, radio_metric, qatch_selected_metrics, external_selected_metric, selected_models)
1281
 
 
1282
  # BAR CHART FOR PROPIETARY DATASET WITH AVERAGE METRICS WITH UPDATE FUNCTION
1283
 
1284
  def lollipop_propietary(selected_models):
1285
  df = load_data_csv_es()
1286
 
1287
  # Filtra solo le categorie rilevanti
1288
+ target_cats = ["Spider", "Economic", "Financial", "Medical", "Miscellaneous", "Beaver"]
1289
  df = df[df['db_category'].isin(target_cats)]
1290
  df = df[df['model'].isin(selected_models)]
1291
 
concatenated_output.csv ADDED
The diff for this file is too large to render. See raw diff
 
tables_dict_beaver.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4fb82cd8b2e985ab794f114d826283bc46a2e3df212d46ccf880ca32b44402
3
+ size 25681