Spaces:

jerpint
/

advent24-llm

Running

App Files Files Community

jerpint commited on Dec 29, 2024

Commit

84520b3

1 Parent(s): 49c830d

use all days

Browse files

Files changed (1) hide show

app.py +34 -7

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from evaluate import get_solution_code
 # For now, only evaluate first 9 days
 df = pd.read_csv("results.csv")
-df = df[df.day < 10]
 with open("solutions.json") as f:
     solutions = json.load(f)
@@ -19,6 +18,20 @@ def score_submissions(row):
     score_1 = solution[0] in result
     score_2 = solution[1] in result
     return [score_1, score_2]
@@ -26,6 +39,8 @@ df["scores"] = df.apply(score_submissions, axis=1)
 df["code"] = df.apply(lambda x: get_solution_code(day = x["day"], model=x["model"]), axis=1)
 df["code_md"] = df.code.apply(lambda x: "```python"+x+"```")
 df["part_1"] = df["scores"].apply(lambda x: x[0])
 df["part_2"] = df["scores"].apply(lambda x: x[1])
@@ -33,6 +48,8 @@ df["part_2"] = df["scores"].apply(lambda x: x[1])
 star_summary = {}
 for model in df.model.unique():
     df_model = df[df.model == model]
     silver_stars = df_model.part_1.sum()
     gold_stars = df_model.part_2.sum()
     total_stars = silver_stars + gold_stars
@@ -45,6 +62,16 @@ for model in df.model.unique():
 star_df = pd.DataFrame.from_dict(star_summary, orient="index")
 with gr.Blocks() as demo:
     md = gr.Markdown("Hello!")
     with gr.Tab("Stars"):
@@ -52,15 +79,15 @@ with gr.Blocks() as demo:
     with gr.Tab("Daily"):
         # Parse the info to something more readable
-        df_daily = df[["model", "day", "part_1", "part_2", "total_time"]]
-        df_daily["Part 1"] = df_daily["part_1"].apply(lambda x: "⭐️" if x else "❌")
-        df_daily["Part 2"] = df_daily["part_2"].apply(lambda x: "⭐️" if x else "❌")
-        df_daily["Runtime (s)"] = df_daily["total_time"].apply(lambda x: str(x)[0:6])
         df_daily = df_daily[["model", "day", "Part 1", "Part 2", "Runtime (s)"]]
         gr_df_daily = gr.DataFrame(df_daily.sort_values(by="day"))
-    # with gr.Tab("Code"):
-    #     gr_code_df = gr.DataFrame(df[["model", "day", "code_md", "result"]], datatype=["str", "str", "markdown", "str"])
 demo.launch()

 # For now, only evaluate first 9 days
 df = pd.read_csv("results.csv")
 with open("solutions.json") as f:
     solutions = json.load(f)
     score_1 = solution[0] in result
     score_2 = solution[1] in result
+    # if solution[0] == "N/A":
+    #     score_1 = "N/A"
+    # else:
+    #     score_1 = solution[0] in result
+    # if not score_1:
+    #     score_2 = False  # Can't get to level 2 without level 1
+    # elif solution[1] == "N/A":
+    #     score_2 =  "N/A"  # Won't evaluate an answer i dont have
+    # else:
+    #     score_2 = solution[1] in result
     return [score_1, score_2]
 df["code"] = df.apply(lambda x: get_solution_code(day = x["day"], model=x["model"]), axis=1)
 df["code_md"] = df.code.apply(lambda x: "```python"+x+"```")
+df["Runtime (s)"] = df["total_time"].apply(lambda x: str(x)[0:6])
 df["part_1"] = df["scores"].apply(lambda x: x[0])
 df["part_2"] = df["scores"].apply(lambda x: x[1])
 star_summary = {}
 for model in df.model.unique():
     df_model = df[df.model == model]
+    # silver_stars = sum([s for s in df_model.part_1.to_list() if not isinstance(s, str)])
+    # gold_stars = sum([s for s in df_model.part_2.to_list() if not isinstance(s, str)])
     silver_stars = df_model.part_1.sum()
     gold_stars = df_model.part_2.sum()
     total_stars = silver_stars + gold_stars
 star_df = pd.DataFrame.from_dict(star_summary, orient="index")
+def score_to_string(s):
+    return "⭐️" if s else "❌"
+    # if s == True:
+    #     return "⭐️"
+    # elif s == False:
+    #     return "❌"
+    # else:
+    #     return "N/A"
 with gr.Blocks() as demo:
     md = gr.Markdown("Hello!")
     with gr.Tab("Stars"):
     with gr.Tab("Daily"):
         # Parse the info to something more readable
+        df_daily = df[["model", "day", "part_1", "part_2", "Runtime (s)"]]
+        df_daily["Part 1"] = df_daily["part_1"].apply(score_to_string)
+        df_daily["Part 2"] = df_daily["part_2"].apply(score_to_string)
         df_daily = df_daily[["model", "day", "Part 1", "Part 2", "Runtime (s)"]]
         gr_df_daily = gr.DataFrame(df_daily.sort_values(by="day"))
+    with gr.Tab("Outputs"):
+        # gr_code_df = gr.DataFrame(df[["model", "day", "code_md", "result"]], datatype=["str", "str", "markdown", "str"])
+        gr_code_df = gr.DataFrame(df[["model", "day", "Runtime (s)", "result"]], datatype=["str", "str", "str"])
 demo.launch()