arunasrivastava commited on
Commit
b2730cf
·
1 Parent(s): a2c34b1

json not working

Browse files
Files changed (1) hide show
  1. app.py +159 -65
app.py CHANGED
@@ -1,9 +1,9 @@
1
-
2
  import gradio as gr
3
  import pandas as pd
4
  import json
5
  from pathlib import Path
6
  from datetime import datetime, timezone
 
7
 
8
  LAST_UPDATED = "Dec 4th 2024"
9
  QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
@@ -12,47 +12,55 @@ APP_DIR = Path("./")
12
  # Modified column names for phonemic transcription metrics
13
  column_names = {
14
  "MODEL": "Model",
15
- "SUBMISSION_NAME": "Submission Name",
16
  "AVG_PER": "Average PER ⬇️",
17
- "AVG_PFER": "Average PFER ⬇️",
18
- "SUBSET": "Dataset Subset",
19
  "GITHUB_URL": "GitHub",
20
  "DATE": "Submission Date"
21
  }
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def load_leaderboard_data():
 
24
  leaderboard_path = QUEUE_DIR / "leaderboard.json"
25
- if not leaderboard_path.exists():
26
- print(f"Warning: Leaderboard file not found at {leaderboard_path}")
27
- return pd.DataFrame()
28
-
29
- try:
30
- with open(leaderboard_path, 'r') as f:
31
- data = json.load(f)
32
- df = pd.DataFrame(data)
33
- return df
34
- except Exception as e:
35
- print(f"Error loading leaderboard data: {e}")
36
- return pd.DataFrame()
37
 
38
  def format_leaderboard_df(df):
 
39
  if df.empty:
40
  return df
41
 
42
- # Rename columns to display names
43
- display_df = df.rename(columns={
44
- "model": "MODEL",
45
- "submission_name": "SUBMISSION_NAME",
46
- "average_per": "AVG_PER",
47
- "average_pfer": "AVG_PFER",
48
- "subset": "SUBSET",
49
- "github_url": "GITHUB_URL",
50
- "submission_date": "DATE"
51
  })
52
 
53
  # Format numeric columns
54
  display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
55
- display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}")
56
 
57
  # Make GitHub URLs clickable
58
  display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
@@ -64,61 +72,130 @@ def format_leaderboard_df(df):
64
 
65
  return display_df
66
 
67
- def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5):
 
68
  if not model_name or not submission_name:
69
  return gr.Markdown("⚠️ Please provide both model name and submission name.")
70
-
71
- request_data = {
72
- "transcription_model": model_name,
73
- "subset": subset,
74
- "max_samples": max_samples,
75
- "submission_name": submission_name,
76
- "github_url": github_url or ""
77
- }
78
 
79
  try:
80
  # Ensure queue directory exists
81
  QUEUE_DIR.mkdir(parents=True, exist_ok=True)
82
 
83
- # Generate unique timestamp for request file
84
- timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-")
85
- request_file = QUEUE_DIR / f"request_{timestamp}.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- with open(request_file, 'w') as f:
88
- json.dump(request_data, f, indent=2)
89
-
90
  return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")
91
 
92
  except Exception as e:
93
  return gr.Markdown(f"❌ Error submitting request: {str(e)}")
94
 
95
  def load_results_for_model(model_name):
 
96
  results_path = QUEUE_DIR / "results.json"
97
- try:
98
- with open(results_path, 'r') as f:
99
- results = json.load(f)
100
-
101
- # Filter results for the specific model
102
- model_results = [r for r in results if r["model"] == model_name]
103
- if not model_results:
104
- return None
105
-
106
- # Get the most recent result
107
- latest_result = max(model_results, key=lambda x: x["timestamp"])
108
- return latest_result
109
- except Exception as e:
110
- print(f"Error loading results: {e}")
111
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  # Create Gradio interface
114
  with gr.Blocks() as demo:
115
  gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
116
  gr.Markdown("""
117
- Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks.
118
 
119
  **Metrics:**
120
  - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better)
121
- - **PFER (Phoneme Frame Error Rate)**: Measures frame-level phoneme prediction accuracy (lower is better)
 
 
 
 
 
122
  """)
123
 
124
  with gr.Tabs() as tabs:
@@ -126,15 +203,13 @@ with gr.Blocks() as demo:
126
  leaderboard_df = load_leaderboard_data()
127
  formatted_df = format_leaderboard_df(leaderboard_df)
128
 
129
- leaderboard_table = gr.DataFrame(
130
- value=formatted_df,
131
- interactive=False,
132
- headers=list(column_names.values())
133
  )
134
 
135
  refresh_btn = gr.Button("🔄 Refresh Leaderboard")
136
  refresh_btn.click(
137
- lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data()))
138
  )
139
 
140
  with gr.TabItem("📝 Submit Model"):
@@ -158,10 +233,28 @@ with gr.Blocks() as demo:
158
  submit_btn = gr.Button("🚀 Submit for Evaluation")
159
  result_text = gr.Markdown()
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  submit_btn.click(
162
- request_evaluation,
163
  inputs=[model_input, submission_name, github_url],
164
- outputs=result_text
165
  )
166
 
167
  with gr.TabItem("ℹ️ Detailed Results"):
@@ -184,4 +277,5 @@ with gr.Blocks() as demo:
184
 
185
  gr.Markdown(f"Last updated: {LAST_UPDATED}")
186
 
187
- demo.launch()
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import json
4
  from pathlib import Path
5
  from datetime import datetime, timezone
6
+ import uuid
7
 
8
  LAST_UPDATED = "Dec 4th 2024"
9
  QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
 
12
  # Modified column names for phonemic transcription metrics
13
  column_names = {
14
  "MODEL": "Model",
 
15
  "AVG_PER": "Average PER ⬇️",
16
+ "AVG_PWED": "Average PWED ⬇️",
 
17
  "GITHUB_URL": "GitHub",
18
  "DATE": "Submission Date"
19
  }
20
 
21
+ def load_json_file(file_path: Path, default=None):
22
+ """Safely load a JSON file or return default if file doesn't exist"""
23
+ if default is None:
24
+ default = []
25
+
26
+ if not file_path.exists():
27
+ return default
28
+
29
+ try:
30
+ with open(file_path, 'r') as f:
31
+ return json.load(f)
32
+ except json.JSONDecodeError:
33
+ return default
34
+
35
+ def save_json_file(file_path: Path, data):
36
+ """Safely save data to a JSON file"""
37
+ file_path.parent.mkdir(parents=True, exist_ok=True)
38
+ with open(file_path, 'w') as f:
39
+ json.dump(data, f, indent=2, ensure_ascii=False)
40
+
41
  def load_leaderboard_data():
42
+ """Load and parse leaderboard data"""
43
  leaderboard_path = QUEUE_DIR / "leaderboard.json"
44
+ data = load_json_file(leaderboard_path)
45
+ return pd.DataFrame(data) if data else pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
46
 
47
  def format_leaderboard_df(df):
48
+ """Format leaderboard dataframe for display"""
49
  if df.empty:
50
  return df
51
 
52
+ # Select and rename only the columns we want to display
53
+ display_df = pd.DataFrame({
54
+ "MODEL": df["model"],
55
+ "AVG_PER": df["average_per"],
56
+ "AVG_PWED": df["average_pwed"],
57
+ "GITHUB_URL": df["github_url"],
58
+ "DATE": pd.to_datetime(df["submission_date"]).dt.strftime("%Y-%m-%d")
 
 
59
  })
60
 
61
  # Format numeric columns
62
  display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
63
+ display_df["AVG_PWED"] = display_df["AVG_PWED"].apply(lambda x: f"{x:.4f}")
64
 
65
  # Make GitHub URLs clickable
66
  display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
 
72
 
73
  return display_df
74
 
75
+ def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=None):
76
+ """Submit new evaluation request"""
77
  if not model_name or not submission_name:
78
  return gr.Markdown("⚠️ Please provide both model name and submission name.")
 
 
 
 
 
 
 
 
79
 
80
  try:
81
  # Ensure queue directory exists
82
  QUEUE_DIR.mkdir(parents=True, exist_ok=True)
83
 
84
+ # Load existing tasks
85
+ tasks_file = QUEUE_DIR / "tasks.json"
86
+ tasks = load_json_file(tasks_file)
87
+
88
+ # Create new task
89
+ new_task = {
90
+ "id": str(uuid.uuid4()),
91
+ "transcription_model": model_name,
92
+ "subset": subset,
93
+ "max_samples": max_samples,
94
+ "submission_name": submission_name,
95
+ "github_url": github_url or "",
96
+ "status": "queued",
97
+ "submitted_at": datetime.now(timezone.utc).isoformat()
98
+ }
99
+
100
+ # Add new task to existing tasks
101
+ tasks.append(new_task)
102
+
103
+ # Save updated tasks
104
+ save_json_file(tasks_file, tasks)
105
 
 
 
 
106
  return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")
107
 
108
  except Exception as e:
109
  return gr.Markdown(f"❌ Error submitting request: {str(e)}")
110
 
111
  def load_results_for_model(model_name):
112
+ """Load detailed results for a specific model"""
113
  results_path = QUEUE_DIR / "results.json"
114
+ results = load_json_file(results_path)
115
+
116
+ # Filter results for the specific model
117
+ model_results = [r for r in results if r["model"] == model_name]
118
+ if not model_results:
 
 
 
 
 
 
 
 
 
119
  return None
120
+
121
+ # Get the most recent result
122
+ latest_result = max(model_results, key=lambda x: x["timestamp"])
123
+ return latest_result
124
+
125
+ def create_html_table(df):
126
+ """Create HTML table with dark theme styling"""
127
+ if df.empty:
128
+ return "<p>No data available</p>"
129
+
130
+ html = """
131
+ <style>
132
+ table {
133
+ width: 100%;
134
+ border-collapse: collapse;
135
+ color: white;
136
+ background-color: #1a1a1a;
137
+ }
138
+ th, td {
139
+ padding: 8px;
140
+ text-align: left;
141
+ border: 1px solid #333;
142
+ }
143
+ th {
144
+ background-color: #2a2a2a;
145
+ color: white;
146
+ }
147
+ tr:nth-child(even) {
148
+ background-color: #252525;
149
+ }
150
+ tr:hover {
151
+ background-color: #303030;
152
+ }
153
+ a {
154
+ color: #6ea8fe;
155
+ text-decoration: none;
156
+ }
157
+ a:hover {
158
+ text-decoration: underline;
159
+ }
160
+ </style>
161
+ <table>
162
+ <thead>
163
+ <tr>
164
+ """
165
+
166
+ # Add headers
167
+ for header in column_names.values():
168
+ html += f"<th>{header}</th>"
169
+
170
+ html += "</tr></thead><tbody>"
171
+
172
+ # Add rows
173
+ for _, row in df.iterrows():
174
+ html += "<tr>"
175
+ for col in df.columns:
176
+ if col == "GITHUB_URL":
177
+ html += f"<td>{row[col]}</td>" # URL is already formatted as HTML
178
+ else:
179
+ html += f"<td>{row[col]}</td>"
180
+ html += "</tr>"
181
+
182
+ html += "</tbody></table>"
183
+ return html
184
 
185
  # Create Gradio interface
186
  with gr.Blocks() as demo:
187
  gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
188
  gr.Markdown("""
189
+ Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks for English.
190
 
191
  **Metrics:**
192
  - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better)
193
+ - **PWED (Phoneme Weighted Edit Distance)**: Measures a weighted difference in phonemes using phonemic features (lower is better)
194
+
195
+ **Datasets:**
196
+ - **[TIMIT](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech)**: A phonemic transcription dataset for English speech recognition
197
+
198
+ To learn more about the evaluation metrics, check out our blog post [here](https://huggingface.co/spaces/evaluate-metric/wer).
199
  """)
200
 
201
  with gr.Tabs() as tabs:
 
203
  leaderboard_df = load_leaderboard_data()
204
  formatted_df = format_leaderboard_df(leaderboard_df)
205
 
206
+ leaderboard_table = gr.HTML(
207
+ value=create_html_table(formatted_df)
 
 
208
  )
209
 
210
  refresh_btn = gr.Button("🔄 Refresh Leaderboard")
211
  refresh_btn.click(
212
+ lambda: gr.HTML(value=create_html_table(format_leaderboard_df(load_leaderboard_data())))
213
  )
214
 
215
  with gr.TabItem("📝 Submit Model"):
 
233
  submit_btn = gr.Button("🚀 Submit for Evaluation")
234
  result_text = gr.Markdown()
235
 
236
+ def submit_and_clear(model_name, submission_name, github_url):
237
+ result = request_evaluation(model_name, submission_name, github_url)
238
+ # If submission was successful, clear the form
239
+ if "✅" in result.value:
240
+ return {
241
+ model_input: "",
242
+ submission_name: "",
243
+ github_url: "",
244
+ result_text: result
245
+ }
246
+ # If there was an error, keep the form data and show error
247
+ return {
248
+ model_input: model_name,
249
+ submission_name: submission_name,
250
+ github_url: github_url,
251
+ result_text: result
252
+ }
253
+
254
  submit_btn.click(
255
+ submit_and_clear,
256
  inputs=[model_input, submission_name, github_url],
257
+ outputs=[model_input, submission_name, github_url, result_text]
258
  )
259
 
260
  with gr.TabItem("ℹ️ Detailed Results"):
 
277
 
278
  gr.Markdown(f"Last updated: {LAST_UPDATED}")
279
 
280
+ if __name__ == "__main__":
281
+ demo.launch()