Spaces:

nuprl
/

reasoning-weekly

Running

App Files Files

Aryarya commited on Mar 5

Commit

099c250

1 Parent(s): 479b4ac

x

Browse files

Files changed (1) hide show

metrics.py +0 -37

metrics.py CHANGED Viewed

@@ -100,11 +100,6 @@ def load_results_sample_one_only():
     WHERE rn = 1;
     """
     conn.execute(query).fetchall()
-    # #print how how many rows are in the table
-    # print(conn.execute("SELECT COUNT(*) FROM sampled").fetchall())
-    # #describe the sampled table
-    # print(conn.execute("DESCRIBE sampled").fetchall())
     conn.execute("""
         CREATE TABLE challenges AS
         SELECT * FROM 'puzzles_cleaned.csv'
@@ -218,38 +213,6 @@ def accuracy_by_model(conn):
             AnswerCheck
     """)
-def accuracy_by_model_only_one(conn):
-    query = """
-    WITH FirstResponses AS (
-        SELECT
-            parent_dir AS model,
-            prompt_id,
-            completion,
-            count,
-            ROW_NUMBER() OVER (PARTITION BY parent_dir, prompt_id) AS rn
-        FROM results.completions
-        WHERE parent_dir = 'completions-r1_cursor_hosted'  -- Only consider rows where parent_dir is 'r1_cursor_hosted'
-    ),
-    AnswerCheck AS (
-        SELECT
-            fr.model,
-            SUM(fr.count) AS total,
-            SUM(fr.count * CAST(check_answer(fr.completion, c.answer) AS INTEGER)) AS correct
-        FROM FirstResponses fr
-        JOIN challenges c ON fr.prompt_id = c.ID
-        WHERE fr.rn = 1  -- Select only the first response per model per prompt
-        GROUP BY fr.model
-    )
-    SELECT
-        model,
-        total,
-        correct,
-        ROUND(correct / total, 2) AS accuracy
-    FROM AnswerCheck;
-    """
-    return conn.sql(query)
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--by-model-and-time", action="store_true")

     WHERE rn = 1;
     """
     conn.execute(query).fetchall()
     conn.execute("""
         CREATE TABLE challenges AS
         SELECT * FROM 'puzzles_cleaned.csv'
             AnswerCheck
     """)
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--by-model-and-time", action="store_true")