update populate
Browse files- src/populate.py +42 -1
src/populate.py
CHANGED
@@ -3,8 +3,10 @@
|
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
import json
|
|
|
6 |
|
7 |
-
from src.display.utils import COLUMNS, EVAL_COLS
|
|
|
8 |
|
9 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
10 |
# Initialize an empty DataFrame
|
@@ -58,3 +60,42 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
|
|
58 |
pending_df = df[df['status'] == 'pending']
|
59 |
|
60 |
return finished_df, running_df, pending_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
import json
|
6 |
+
import random
|
7 |
|
8 |
+
from src.display.utils import COLUMNS, EVAL_COLS, Tasks
|
9 |
+
from src.envs import EVAL_RESULTS_PATH, FIXED_QUESTIONS_FILE # Define FIXED_QUESTIONS_FILE in envs.py
|
10 |
|
11 |
def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
|
12 |
# Initialize an empty DataFrame
|
|
|
60 |
pending_df = df[df['status'] == 'pending']
|
61 |
|
62 |
return finished_df, running_df, pending_df
|
63 |
+
|
64 |
+
def preselect_fixed_questions(dataset_path, num_questions_per_subject=30, fixed_questions_file='fixed_questions.json'):
|
65 |
+
"""
|
66 |
+
Preselects a fixed number of questions per subject and saves them to a JSON file.
|
67 |
+
"""
|
68 |
+
# Load the dataset
|
69 |
+
# Assuming the dataset is in CSV format with a 'Subject' column
|
70 |
+
if not os.path.exists(dataset_path):
|
71 |
+
raise FileNotFoundError(f"Dataset file not found at {dataset_path}")
|
72 |
+
|
73 |
+
dataset = pd.read_csv(dataset_path)
|
74 |
+
|
75 |
+
fixed_questions = {}
|
76 |
+
|
77 |
+
for task in Tasks:
|
78 |
+
subject = task.value.benchmark
|
79 |
+
subject_questions = dataset[dataset['Subject'] == subject]
|
80 |
+
|
81 |
+
if len(subject_questions) < num_questions_per_subject:
|
82 |
+
raise ValueError(f"Not enough questions for subject '{subject}'. Required: {num_questions_per_subject}, Available: {len(subject_questions)}")
|
83 |
+
|
84 |
+
# Randomly select fixed number of questions
|
85 |
+
selected_questions = subject_questions.sample(n=num_questions_per_subject, random_state=42) # random_state for reproducibility
|
86 |
+
fixed_questions[subject] = selected_questions.to_dict(orient='records')
|
87 |
+
|
88 |
+
# Save fixed questions to a JSON file
|
89 |
+
with open(os.path.join(EVAL_RESULTS_PATH, fixed_questions_file), 'w') as f:
|
90 |
+
json.dump(fixed_questions, f, indent=4)
|
91 |
+
|
92 |
+
print(f"Fixed questions preselected and saved to {fixed_questions_file}")
|
93 |
+
|
94 |
+
if __name__ == "__main__":
|
95 |
+
# Example usage:
|
96 |
+
# Define the path to your dataset
|
97 |
+
DATASET_PATH = os.path.join(EVAL_RESULTS_PATH, "your_dataset.csv") # Update with your actual dataset file
|
98 |
+
FIXED_QUESTIONS_FILE = "fixed_questions.json" # Define the name for fixed questions file
|
99 |
+
|
100 |
+
# Preselect fixed questions
|
101 |
+
preselect_fixed_questions(DATASET_PATH, num_questions_per_subject=30, fixed_questions_file=FIXED_QUESTIONS_FILE)
|