Aryarya commited on
Commit
099c250
·
1 Parent(s): 479b4ac
Files changed (1) hide show
  1. metrics.py +0 -37
metrics.py CHANGED
@@ -100,11 +100,6 @@ def load_results_sample_one_only():
100
  WHERE rn = 1;
101
  """
102
  conn.execute(query).fetchall()
103
- # #print how how many rows are in the table
104
- # print(conn.execute("SELECT COUNT(*) FROM sampled").fetchall())
105
- # #describe the sampled table
106
- # print(conn.execute("DESCRIBE sampled").fetchall())
107
-
108
  conn.execute("""
109
  CREATE TABLE challenges AS
110
  SELECT * FROM 'puzzles_cleaned.csv'
@@ -218,38 +213,6 @@ def accuracy_by_model(conn):
218
  AnswerCheck
219
  """)
220
 
221
- def accuracy_by_model_only_one(conn):
222
- query = """
223
- WITH FirstResponses AS (
224
- SELECT
225
- parent_dir AS model,
226
- prompt_id,
227
- completion,
228
- count,
229
- ROW_NUMBER() OVER (PARTITION BY parent_dir, prompt_id) AS rn
230
- FROM results.completions
231
- WHERE parent_dir = 'completions-r1_cursor_hosted' -- Only consider rows where parent_dir is 'r1_cursor_hosted'
232
- ),
233
- AnswerCheck AS (
234
- SELECT
235
- fr.model,
236
- SUM(fr.count) AS total,
237
- SUM(fr.count * CAST(check_answer(fr.completion, c.answer) AS INTEGER)) AS correct
238
- FROM FirstResponses fr
239
- JOIN challenges c ON fr.prompt_id = c.ID
240
- WHERE fr.rn = 1 -- Select only the first response per model per prompt
241
- GROUP BY fr.model
242
- )
243
- SELECT
244
- model,
245
- total,
246
- correct,
247
- ROUND(correct / total, 2) AS accuracy
248
- FROM AnswerCheck;
249
- """
250
- return conn.sql(query)
251
-
252
-
253
  def main():
254
  parser = argparse.ArgumentParser()
255
  parser.add_argument("--by-model-and-time", action="store_true")
 
100
  WHERE rn = 1;
101
  """
102
  conn.execute(query).fetchall()
 
 
 
 
 
103
  conn.execute("""
104
  CREATE TABLE challenges AS
105
  SELECT * FROM 'puzzles_cleaned.csv'
 
213
  AnswerCheck
214
  """)
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  def main():
217
  parser = argparse.ArgumentParser()
218
  parser.add_argument("--by-model-and-time", action="store_true")