Omartificial-Intelligence-Space commited on
Commit
21cb2aa
·
verified ·
1 Parent(s): 83fc769

update populate

Browse files
Files changed (1) hide show
  1. src/populate.py +42 -1
src/populate.py CHANGED
@@ -3,8 +3,10 @@
3
  import os
4
  import pandas as pd
5
  import json
 
6
 
7
- from src.display.utils import COLUMNS, EVAL_COLS
 
8
 
9
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
10
  # Initialize an empty DataFrame
@@ -58,3 +60,42 @@ def get_evaluation_queue_df(eval_requests_path, eval_cols):
58
  pending_df = df[df['status'] == 'pending']
59
 
60
  return finished_df, running_df, pending_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import os
4
  import pandas as pd
5
  import json
6
+ import random
7
 
8
+ from src.display.utils import COLUMNS, EVAL_COLS, Tasks
9
+ from src.envs import EVAL_RESULTS_PATH, FIXED_QUESTIONS_FILE # Define FIXED_QUESTIONS_FILE in envs.py
10
 
11
  def get_leaderboard_df(eval_results_path, eval_requests_path, cols, benchmark_cols):
12
  # Initialize an empty DataFrame
 
60
  pending_df = df[df['status'] == 'pending']
61
 
62
  return finished_df, running_df, pending_df
63
+
64
+ def preselect_fixed_questions(dataset_path, num_questions_per_subject=30, fixed_questions_file='fixed_questions.json'):
65
+ """
66
+ Preselects a fixed number of questions per subject and saves them to a JSON file.
67
+ """
68
+ # Load the dataset
69
+ # Assuming the dataset is in CSV format with a 'Subject' column
70
+ if not os.path.exists(dataset_path):
71
+ raise FileNotFoundError(f"Dataset file not found at {dataset_path}")
72
+
73
+ dataset = pd.read_csv(dataset_path)
74
+
75
+ fixed_questions = {}
76
+
77
+ for task in Tasks:
78
+ subject = task.value.benchmark
79
+ subject_questions = dataset[dataset['Subject'] == subject]
80
+
81
+ if len(subject_questions) < num_questions_per_subject:
82
+ raise ValueError(f"Not enough questions for subject '{subject}'. Required: {num_questions_per_subject}, Available: {len(subject_questions)}")
83
+
84
+ # Randomly select fixed number of questions
85
+ selected_questions = subject_questions.sample(n=num_questions_per_subject, random_state=42) # random_state for reproducibility
86
+ fixed_questions[subject] = selected_questions.to_dict(orient='records')
87
+
88
+ # Save fixed questions to a JSON file
89
+ with open(os.path.join(EVAL_RESULTS_PATH, fixed_questions_file), 'w') as f:
90
+ json.dump(fixed_questions, f, indent=4)
91
+
92
+ print(f"Fixed questions preselected and saved to {fixed_questions_file}")
93
+
94
+ if __name__ == "__main__":
95
+ # Example usage:
96
+ # Define the path to your dataset
97
+ DATASET_PATH = os.path.join(EVAL_RESULTS_PATH, "your_dataset.csv") # Update with your actual dataset file
98
+ FIXED_QUESTIONS_FILE = "fixed_questions.json" # Define the name for fixed questions file
99
+
100
+ # Preselect fixed questions
101
+ preselect_fixed_questions(DATASET_PATH, num_questions_per_subject=30, fixed_questions_file=FIXED_QUESTIONS_FILE)