Spaces:
Running
Running
skip excluded tasks
Browse files- app.py +4 -1
- lighteval_to_duckdb.py +72 -0
- metrics.py +6 -1
app.py
CHANGED
@@ -189,7 +189,10 @@ def all_challenges_view():
|
|
189 |
|
190 |
def select_table_item(evt: gr.SelectData):
|
191 |
model_index = evt.index[1]
|
192 |
-
challenge_id = evt.index[0]
|
|
|
|
|
|
|
193 |
model_name = model_columns[model_index] if model_index in valid_model_indices else default_model
|
194 |
return (model_name, challenge_id)
|
195 |
|
|
|
189 |
|
190 |
def select_table_item(evt: gr.SelectData):
|
191 |
model_index = evt.index[1]
|
192 |
+
# challenge_id = evt.index[0]
|
193 |
+
row_index = evt.index[0] # The row index of the selected row
|
194 |
+
# Map the row index to the challenge_id (which is the 'ID' in your DataFrame)
|
195 |
+
challenge_id = relabelled_df.iloc[row_index]['ID']
|
196 |
model_name = model_columns[model_index] if model_index in valid_model_indices else default_model
|
197 |
return (model_name, challenge_id)
|
198 |
|
lighteval_to_duckdb.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import duckdb
|
2 |
+
from pathlib import Path
|
3 |
+
import argparse
|
4 |
+
import re
|
5 |
+
import ast
|
6 |
+
from datasets import load_dataset
|
7 |
+
|
8 |
+
def export_to_duckdb(conn, results_dir: Path):
|
9 |
+
# SQL schema (matching results.completions)
|
10 |
+
create_string = """CREATE TABLE IF NOT EXISTS completions (
|
11 |
+
parent_dir TEXT NOT NULL,
|
12 |
+
prompt_id INT NOT NULL,
|
13 |
+
prompt TEXT NOT NULL,
|
14 |
+
completion TEXT NOT NULL,
|
15 |
+
count INT NOT NULL,
|
16 |
+
temperature FLOAT NOT NULL,
|
17 |
+
top_p FLOAT NOT NULL,
|
18 |
+
max_tokens INT NOT NULL,
|
19 |
+
PRIMARY KEY (parent_dir, prompt_id, prompt, completion)
|
20 |
+
)"""
|
21 |
+
conn.execute(create_string)
|
22 |
+
created = 0 # Counter for inserted rows
|
23 |
+
parquet_files = list(results_dir.rglob("*.parquet"))
|
24 |
+
for parquet_file in parquet_files:
|
25 |
+
parquet_file = str(parquet_file)
|
26 |
+
# Load the dataset from the Parquet file
|
27 |
+
details = load_dataset("parquet", data_files=parquet_file, split="train")
|
28 |
+
|
29 |
+
for completion_item in details:
|
30 |
+
# Extract values safely
|
31 |
+
parent_dir = parquet_file.split("/results/details/")[1].split("/")[0]
|
32 |
+
specifics = ast.literal_eval(completion_item['specifics']) # Convert string to dictionary
|
33 |
+
prompt_id = specifics.get('ID', -1)
|
34 |
+
prompt = completion_item.get("full_prompt", "")
|
35 |
+
completion = ast.literal_eval(completion_item.get("predictions", ""))[0]
|
36 |
+
while type(completion) != str:
|
37 |
+
completion = completion[0]
|
38 |
+
count = 0 # Placeholder value
|
39 |
+
temperature = 0.0 # Placeholder value
|
40 |
+
top_p = 0.0 # Placeholder value
|
41 |
+
max_tokens = 2048 # Placeholder value
|
42 |
+
|
43 |
+
# Insert into DuckDB
|
44 |
+
conn.execute(
|
45 |
+
"INSERT INTO completions VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
46 |
+
(parent_dir, prompt_id, prompt, completion, count, temperature, top_p, max_tokens),
|
47 |
+
)
|
48 |
+
created += 1
|
49 |
+
|
50 |
+
conn.commit()
|
51 |
+
print(f"Created {created} rows.")
|
52 |
+
|
53 |
+
|
54 |
+
def to_duckdb(db_path: str, results_dir: Path):
|
55 |
+
"""Create a DuckDB connection and export data."""
|
56 |
+
conn = duckdb.connect(db_path)
|
57 |
+
export_to_duckdb(conn, results_dir)
|
58 |
+
conn.close()
|
59 |
+
|
60 |
+
def main():
|
61 |
+
"""Command-line interface for exporting Parquet data to DuckDB."""
|
62 |
+
parser = argparse.ArgumentParser()
|
63 |
+
parser.add_argument("db_path", type=str, help="Path to DuckDB database file")
|
64 |
+
parser.add_argument("results_dir", type=Path, help="Path to results dir")
|
65 |
+
args = parser.parse_args()
|
66 |
+
to_duckdb(args.db_path, args.results_dir)
|
67 |
+
|
68 |
+
if __name__ == "__main__":
|
69 |
+
main()
|
70 |
+
|
71 |
+
#results_dir = "/mnt/ssd/aryawu/lighteval/results/details"
|
72 |
+
#python npr_to_duckdb.py "/mnt/ssd/aryawu/lighteval/results.duckdb" "/mnt/ssd/aryawu/lighteval/results/details"
|
metrics.py
CHANGED
@@ -60,7 +60,12 @@ def _wrap_text(text: str, width: int) -> str:
|
|
60 |
def load_results():
|
61 |
conn = duckdb.connect(":memory:")
|
62 |
conn.execute("ATTACH DATABASE 'results.duckdb' AS results (READ_ONLY)")
|
63 |
-
conn.execute("CREATE TABLE challenges as SELECT * FROM 'puzzles_cleaned.csv'")
|
|
|
|
|
|
|
|
|
|
|
64 |
conn.create_function("check_answer", _check_answer)
|
65 |
conn.create_function("clip_text", _clip_text)
|
66 |
conn.create_function("wrap_text", _wrap_text)
|
|
|
60 |
def load_results():
|
61 |
conn = duckdb.connect(":memory:")
|
62 |
conn.execute("ATTACH DATABASE 'results.duckdb' AS results (READ_ONLY)")
|
63 |
+
# conn.execute("CREATE TABLE challenges as SELECT * FROM 'puzzles_cleaned.csv'")
|
64 |
+
conn.execute("""
|
65 |
+
CREATE TABLE challenges AS
|
66 |
+
SELECT * FROM 'puzzles_cleaned.csv'
|
67 |
+
WHERE Warnings IS NULL OR Warnings NOT LIKE '%(E)%'
|
68 |
+
""")
|
69 |
conn.create_function("check_answer", _check_answer)
|
70 |
conn.create_function("clip_text", _clip_text)
|
71 |
conn.create_function("wrap_text", _wrap_text)
|