tdoehmen commited on
Commit
3445f6a
·
1 Parent(s): 470a9a5

save just the eval main results

Browse files
Files changed (1) hide show
  1. evaluation_logic.py +20 -1
evaluation_logic.py CHANGED
@@ -57,15 +57,34 @@ def save_prediction(inference_api, model_name, prompt_format, question, generate
57
  def save_evaluation(inference_api, model_name, prompt_format, metrics):
58
  evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
59
  evaluation_folder.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  with evaluation_scheduler.lock:
61
  with evaluation_file.open("a") as f:
62
  json.dump({
63
  "inference_api": inference_api,
64
  "model_name": model_name,
65
  "prompt_format": prompt_format,
66
- "metrics": metrics,
67
  "timestamp": datetime.now().isoformat()
68
  }, f)
 
69
 
70
  def run_prediction(inference_api, model_name, prompt_format, output_file):
71
  dataset_path = str(eval_dir / "data/dev.json")
 
57
  def save_evaluation(inference_api, model_name, prompt_format, metrics):
58
  evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
59
  evaluation_folder.mkdir(parents=True, exist_ok=True)
60
+
61
+ # Extract only the category-specific execution metrics
62
+ categories = ['easy', 'medium', 'hard', 'duckdb', 'ddl', 'all']
63
+ simplified_metrics = {}
64
+
65
+ for category in categories:
66
+ if category in metrics['exec']:
67
+ category_metrics = metrics['exec'][category]
68
+ simplified_metrics[category] = {
69
+ 'count': category_metrics['count'],
70
+ 'execution_accuracy': category_metrics['exec']
71
+ }
72
+ else:
73
+ simplified_metrics[category] = {
74
+ 'count': 0,
75
+ 'execution_accuracy': 0.0
76
+ }
77
+
78
  with evaluation_scheduler.lock:
79
  with evaluation_file.open("a") as f:
80
  json.dump({
81
  "inference_api": inference_api,
82
  "model_name": model_name,
83
  "prompt_format": prompt_format,
84
+ "category_metrics": simplified_metrics,
85
  "timestamp": datetime.now().isoformat()
86
  }, f)
87
+ f.write('\n')
88
 
89
  def run_prediction(inference_api, model_name, prompt_format, output_file):
90
  dataset_path = str(eval_dir / "data/dev.json")