rohansampath commited on
Commit
f64ee28
·
verified ·
1 Parent(s): a955837

Update dataset_previews.py

Browse files
Files changed (1) hide show
  1. dataset_previews.py +16 -11
dataset_previews.py CHANGED
@@ -78,14 +78,7 @@ def mmlupro_dataset_preview(regenerate_preview=True) -> Dict[str, Any]:
78
  try:
79
  # Calculate dataset statistics
80
  stats = calculate_dataset_statistics()
81
-
82
- # Format subject counts as a string, in descending order
83
- sorted_subjects = sorted(stats["subject_counts"].items(), key=lambda x: x[1], reverse=True)
84
- subject_counts_str = f"Total: {stats['total_questions']}\n"
85
- for subject, count in sorted_subjects:
86
- subject_counts_str += f"{subject}: {count}\n"
87
- subject_counts_str = subject_counts_str.strip()
88
-
89
  # Format options distribution as a string
90
  options_dist_str = f"Maximum: {stats['max_options']}\nAverage: {stats['avg_options']:.2f}\n"
91
  sorted_options = sorted(stats["options_distribution"].items(), key=lambda x: x[0], reverse=True)
@@ -104,7 +97,8 @@ def mmlupro_dataset_preview(regenerate_preview=True) -> Dict[str, Any]:
104
  "paper": "https://arxiv.org/abs/2406.01574"
105
  },
106
  "organization": "Questions are organized into 14 subjects. Each subject has 5 validation questions (for a total of 70). The 5 validation questions serve as 5-shot prompts for each evaluation question.",
107
- "num_questions": subject_counts_str,
 
108
  "choices_per_question": options_dist_str
109
  }
110
 
@@ -131,12 +125,23 @@ def mmlupro_dataset_preview(regenerate_preview=True) -> Dict[str, Any]:
131
  "paper": "https://arxiv.org/abs/2406.01574"
132
  },
133
  "organization": "Questions are organized into 14 subjects. Each subject has 5 validation questions (for a total of 70). The 5 validation questions serve as 5-shot prompts for each evaluation question.",
134
- "num_questions": f"Total: {num_questions} (Note: Using fallback value)",
 
135
  "choices_per_question": "Maximum: 10\nAverage: 10.0\n10-choices: 12032"
136
  }
137
 
138
  return preview_data
139
 
 
 
 
 
 
 
 
 
 
 
140
  def format_preview_for_display(preview_data: Dict[str, Any]) -> pd.DataFrame:
141
  """
142
  Format the preview data with improved readability for display in Gradio
@@ -162,7 +167,7 @@ def format_preview_for_display(preview_data: Dict[str, Any]) -> pd.DataFrame:
162
  {"Dataset Property": "Description", "Details": preview_data["description"]},
163
  {"Dataset Property": "Links", "Details": links_formatted},
164
  {"Dataset Property": "Organization", "Details": preview_data["organization"]},
165
- {"Dataset Property": "Number of Questions", "Details": preview_data["num_questions"]},
166
  {"Dataset Property": "Choices per Question", "Details": preview_data["choices_per_question"]}
167
  ]
168
 
 
78
  try:
79
  # Calculate dataset statistics
80
  stats = calculate_dataset_statistics()
81
+
 
 
 
 
 
 
 
82
  # Format options distribution as a string
83
  options_dist_str = f"Maximum: {stats['max_options']}\nAverage: {stats['avg_options']:.2f}\n"
84
  sorted_options = sorted(stats["options_distribution"].items(), key=lambda x: x[0], reverse=True)
 
97
  "paper": "https://arxiv.org/abs/2406.01574"
98
  },
99
  "organization": "Questions are organized into 14 subjects. Each subject has 5 validation questions (for a total of 70). The 5 validation questions serve as 5-shot prompts for each evaluation question.",
100
+ "total_questions": stats["total_questions"],
101
+ "subject_counts": stats["subject_counts"],
102
  "choices_per_question": options_dist_str
103
  }
104
 
 
125
  "paper": "https://arxiv.org/abs/2406.01574"
126
  },
127
  "organization": "Questions are organized into 14 subjects. Each subject has 5 validation questions (for a total of 70). The 5 validation questions serve as 5-shot prompts for each evaluation question.",
128
+ "total_questions": 12032
129
+ "subject_counts": f"Total: 12032 (Note: Using fallback value)",
130
  "choices_per_question": "Maximum: 10\nAverage: 10.0\n10-choices: 12032"
131
  }
132
 
133
  return preview_data
134
 
135
+ def subject_counts_formatting(subject_counts, total_questions):
136
+ # Format subject counts as a string, in descending order
137
+ sorted_subjects = sorted(subject_counts.items(), key=lambda x: x[1], reverse=True)
138
+ subject_counts_str = f"Total: {total_questions}\n"
139
+ for subject, count in sorted_subjects:
140
+ subject_counts_str += f"{subject}: {count}\n"
141
+ subject_counts_str = subject_counts_str.strip()
142
+
143
+
144
+
145
  def format_preview_for_display(preview_data: Dict[str, Any]) -> pd.DataFrame:
146
  """
147
  Format the preview data with improved readability for display in Gradio
 
167
  {"Dataset Property": "Description", "Details": preview_data["description"]},
168
  {"Dataset Property": "Links", "Details": links_formatted},
169
  {"Dataset Property": "Organization", "Details": preview_data["organization"]},
170
+ {"Dataset Property": "Number of Questions", "Details": subject_counts_formatting(preview_data["subject_counts"],preview_data["total_questions"], },
171
  {"Dataset Property": "Choices per Question", "Details": preview_data["choices_per_question"]}
172
  ]
173