Spaces:
Running
Running
clean up
Browse files- evaluate.py +19 -13
evaluate.py
CHANGED
|
@@ -2,22 +2,23 @@ import os
|
|
| 2 |
import json
|
| 3 |
import subprocess
|
| 4 |
import pandas as pd
|
| 5 |
-
# from sklearn.manifold import TSNE
|
| 6 |
|
| 7 |
from generate import get_solution_file_path, all_models
|
| 8 |
-
from openai import OpenAI
|
| 9 |
import time
|
| 10 |
|
| 11 |
import os
|
| 12 |
import subprocess
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
# cd to the day directory
|
| 23 |
os.chdir(f"day{day:02d}")
|
|
@@ -31,7 +32,6 @@ def evaluate_submission(day: int, model: str):
|
|
| 31 |
print(f"Evaluating {file_path} for day {day} with model {model}")
|
| 32 |
|
| 33 |
# run the solution, and capture the output
|
| 34 |
-
timeout = 60 * 5
|
| 35 |
start_time = time.time()
|
| 36 |
try:
|
| 37 |
result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
|
|
@@ -60,14 +60,17 @@ def get_solution_code(day: int, model: str) -> str:
|
|
| 60 |
return file.read()
|
| 61 |
|
| 62 |
|
| 63 |
-
def extract_solutions(df, output_file = "solutions.json"):
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
| 65 |
solutions = {}
|
| 66 |
for day in range(1, 26):
|
| 67 |
-
sub_df = df[(df.model == "jerpint") & (df.day == day)]
|
| 68 |
-
|
| 69 |
|
|
|
|
| 70 |
day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
|
|
|
|
| 71 |
if len(day_solution) == 0:
|
| 72 |
part1 = "N/A"
|
| 73 |
part2 = "N/A"
|
|
@@ -125,8 +128,11 @@ def evaluate_submissions(all_models, results_file = "results.csv", skip = True):
|
|
| 125 |
|
| 126 |
|
| 127 |
if __name__ == "__main__":
|
|
|
|
| 128 |
all_models["human"] = ["jerpint"]
|
|
|
|
|
|
|
| 129 |
df = evaluate_submissions(all_models, results_file="results.csv")
|
| 130 |
|
| 131 |
-
#
|
| 132 |
-
solutions = extract_solutions(df, output_file="solutions.json")
|
|
|
|
| 2 |
import json
|
| 3 |
import subprocess
|
| 4 |
import pandas as pd
|
|
|
|
| 5 |
|
| 6 |
from generate import get_solution_file_path, all_models
|
|
|
|
| 7 |
import time
|
| 8 |
|
| 9 |
import os
|
| 10 |
import subprocess
|
| 11 |
|
| 12 |
|
| 13 |
+
def evaluate_submission(day: int, model: str, timeout = 60 * 5):
|
| 14 |
+
"""Evaluates the python code of a submission for the given day and model.
|
| 15 |
|
| 16 |
+
Returns the result captured from stdout and the total time taken.
|
| 17 |
|
| 18 |
+
Does not score the actual submission (e.g. reward a star), this comes later.
|
| 19 |
+
Timeout (seconds) is used to halt the program after that amount of time, in case infinite loops arise.
|
| 20 |
+
If errors are produced, they are also returned.
|
| 21 |
+
"""
|
| 22 |
|
| 23 |
# cd to the day directory
|
| 24 |
os.chdir(f"day{day:02d}")
|
|
|
|
| 32 |
print(f"Evaluating {file_path} for day {day} with model {model}")
|
| 33 |
|
| 34 |
# run the solution, and capture the output
|
|
|
|
| 35 |
start_time = time.time()
|
| 36 |
try:
|
| 37 |
result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
|
|
|
|
| 60 |
return file.read()
|
| 61 |
|
| 62 |
|
| 63 |
+
def extract_solutions(df, model: str, output_file = "solutions.json") -> dict:
|
| 64 |
+
"""This will get all solutions produced by the model, and use those as 'ground truth', which can be used to score other models.
|
| 65 |
+
|
| 66 |
+
Results saved in a .json format
|
| 67 |
+
"""
|
| 68 |
solutions = {}
|
| 69 |
for day in range(1, 26):
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
sub_df = df[(df.model == model) & (df.day == day)]
|
| 72 |
day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
|
| 73 |
+
|
| 74 |
if len(day_solution) == 0:
|
| 75 |
part1 = "N/A"
|
| 76 |
part2 = "N/A"
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
if __name__ == "__main__":
|
| 131 |
+
# Add my submissions to the list of available models, for convenience
|
| 132 |
all_models["human"] = ["jerpint"]
|
| 133 |
+
|
| 134 |
+
# Collects all outputs from running the python code
|
| 135 |
df = evaluate_submissions(all_models, results_file="results.csv")
|
| 136 |
|
| 137 |
+
# Extracts solutions
|
| 138 |
+
solutions = extract_solutions(df, output_file="solutions.json", model = "jerpint")
|