rohansampath commited on
Commit
c44c829
·
verified ·
1 Parent(s): 5ea0bec

Create dataset_previews.py

Browse files
Files changed (1) hide show
  1. dataset_previews.py +120 -0
dataset_previews.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import pandas as pd
4
+ from typing import Dict, Any
5
+
6
+ def calculate_evaluation_time(num_questions: int, input_tokens: int, generated_tokens_per_question: int = 300):
7
+ """Calculate approximate evaluation time based on token counts and throughput assumptions"""
8
+ # Constants
9
+ PROMPT_THROUGHPUT = 5000 # tokens per second
10
+ GENERATION_THROUGHPUT = 500 # tokens per second
11
+ OVERHEAD_MINUTES = 2 # Fixed overhead for model loading, etc.
12
+
13
+ # Calculate total generated tokens
14
+ total_generated_tokens = num_questions * generated_tokens_per_question
15
+
16
+ # Calculate time components (in seconds)
17
+ prompt_time = input_tokens / PROMPT_THROUGHPUT
18
+ generation_time = total_generated_tokens / GENERATION_THROUGHPUT
19
+
20
+ # Total time in minutes
21
+ total_time_minutes = (prompt_time + generation_time) / 60 + OVERHEAD_MINUTES
22
+
23
+ return total_time_minutes
24
+
25
+ def mmlupro_dataset_preview() -> Dict[str, Any]:
26
+ """
27
+ Generate or retrieve the MMLU-Pro dataset preview information.
28
+
29
+ Returns:
30
+ Dict[str, Any]: Dictionary containing dataset information
31
+ """
32
+ preview_file = "mmlu_pro_dataset_preview_table.json"
33
+
34
+ # Check if preview file exists
35
+ if os.path.exists(preview_file):
36
+ try:
37
+ # Read existing preview file
38
+ with open(preview_file, 'r') as f:
39
+ preview_data = json.load(f)
40
+ return format_preview_for_display(preview_data)
41
+ except Exception as e:
42
+ print(f"Error reading preview file: {e}")
43
+ # If file exists but can't be read, regenerate it
44
+
45
+ # Generate preview data if file doesn't exist or couldn't be read
46
+ num_questions = 12032
47
+ input_tokens = 12642105
48
+ generated_tokens_per_question = 300
49
+
50
+ # Calculate evaluation time
51
+ eval_time_minutes = calculate_evaluation_time(
52
+ num_questions,
53
+ input_tokens,
54
+ generated_tokens_per_question
55
+ )
56
+
57
+ # Create preview data
58
+ preview_data = {
59
+ "dataset_name": "MMLU-Pro",
60
+ "evaluation_type": "Multiple Choice",
61
+ "description": "MMLU-Pro is a refined version of the MMLU dataset, which has been a standard for multiple-choice knowledge assessment. Recent research identified issues with the original MMLU, such as noisy data (some unanswerable questions) and decreasing difficulty due to advances in model capabilities and increased data contamination. MMLU-Pro addresses these issues by presenting models with 10 choices instead of 4, requiring reasoning on more questions, and undergoing expert review to reduce noise. As a result, MMLU-Pro is of higher quality and currently more challenging than the original. A higher score is a better score.",
62
+ "links": {
63
+ "huggingface": "https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro",
64
+ "github": "https://github.com/TIGER-AI-Lab/MMLU-Pro",
65
+ "paper": "https://arxiv.org/abs/2406.01574"
66
+ },
67
+ "organization": "Questions are organized into 14 subjects. Each subject has 5 validation questions (for a total of 70). The 5 validation questions serve as 5-shot prompts for each evaluation question.",
68
+ "num_questions": num_questions,
69
+ "input_tokens": input_tokens,
70
+ "evaluation_time": {
71
+ "generated_tokens_per_question": generated_tokens_per_question,
72
+ "total_generated_tokens": num_questions * generated_tokens_per_question,
73
+ "prompt_throughput": 5000,
74
+ "generation_throughput": 500,
75
+ "total_time_minutes": round(eval_time_minutes, 2)
76
+ }
77
+ }
78
+
79
+ # Save preview data to file
80
+ try:
81
+ with open(preview_file, 'w') as f:
82
+ json.dump(preview_data, f, indent=2)
83
+ except Exception as e:
84
+ print(f"Error writing preview file: {e}")
85
+
86
+ return format_preview_for_display(preview_data)
87
+
88
+ def format_preview_for_display(preview_data: Dict[str, Any]) -> pd.DataFrame:
89
+ """
90
+ Format the preview data into a DataFrame for display in Gradio
91
+
92
+ Args:
93
+ preview_data (Dict[str, Any]): Dataset preview information
94
+
95
+ Returns:
96
+ pd.DataFrame: Formatted data for display
97
+ """
98
+ # Create a table format with keys and values
99
+ rows = [
100
+ {"Key": "Dataset Name", "Value": preview_data["dataset_name"]},
101
+ {"Key": "Evaluation Type", "Value": preview_data["evaluation_type"]},
102
+ {"Key": "Description", "Value": preview_data["description"]},
103
+ {"Key": "Links", "Value": (
104
+ f"Hugging Face: {preview_data['links']['huggingface']}\n"
105
+ f"GitHub: {preview_data['links']['github']}\n"
106
+ f"Paper: {preview_data['links']['paper']}"
107
+ )},
108
+ {"Key": "Organization", "Value": preview_data["organization"]},
109
+ {"Key": "Number of Questions", "Value": preview_data["num_questions"]},
110
+ {"Key": "Number of Input Tokens", "Value": preview_data["input_tokens"]},
111
+ {"Key": "Estimated Evaluation Time", "Value": f"{preview_data['evaluation_time']['total_time_minutes']} minutes (for 2 models on A100)"}
112
+ ]
113
+
114
+ return pd.DataFrame(rows)
115
+
116
+ # Example usage (for testing)
117
+ if __name__ == "__main__":
118
+ preview_data = mmlupro_dataset_preview()
119
+ df = format_preview_for_display(preview_data)
120
+ print(df)