arunasrivastava commited on
Commit
a2c34b1
·
1 Parent(s): b957022

it worked! mostly

Browse files
Files changed (2) hide show
  1. app.py +161 -114
  2. utils_display.py +1 -1
app.py CHANGED
@@ -1,140 +1,187 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
  import json
4
- from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS
5
- from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
6
- from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
7
  from datetime import datetime, timezone
8
 
9
- LAST_UPDATED = "Nov 22th 2024"
 
 
10
 
 
11
  column_names = {
12
  "MODEL": "Model",
13
- "Avg. PER": "Average PER ⬇️",
14
- "Avg. PWED": "Avg. PWED ⬆️️",
 
 
 
 
15
  }
16
 
17
- eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- if not csv_results.exists():
20
- raise Exception(f"CSV file {csv_results} does not exist locally")
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Get csv with data and parse columns
23
- original_df = pd.read_csv(csv_results)
24
-
25
- # Formats the columns
26
- def formatter(x):
27
- if type(x) is str:
28
- x = x
29
- else:
30
- x = round(x, 2)
31
- return x
32
-
33
- for col in original_df.columns:
34
- if col == "model":
35
- original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
36
- else:
37
- original_df[col] = original_df[col].apply(formatter) # For numerical values
38
-
39
- original_df.rename(columns=column_names, inplace=True)
40
- original_df.sort_values(by='Average WER ⬇️', inplace=True)
41
-
42
- COLS = [c.name for c in fields(AutoEvalColumn)]
43
- TYPES = [c.type for c in fields(AutoEvalColumn)]
44
-
45
-
46
- def request_model(model_text, chbcoco2017):
47
 
48
- # Determine the selected checkboxes
49
- dataset_selection = []
50
- if chbcoco2017:
51
- dataset_selection.append("ESB Datasets tests only")
52
 
53
- if len(dataset_selection) == 0:
54
- return styled_error("You need to select at least one dataset")
 
55
 
56
- base_model_on_hub, error_msg = is_model_on_hub(model_text)
57
-
58
- if not base_model_on_hub:
59
- return styled_error(f"Base model '{model_text}' {error_msg}")
60
-
61
- # Construct the output dictionary
62
- current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
63
- required_datasets = ', '.join(dataset_selection)
64
- eval_entry = {
65
- "date": current_time,
66
- "model": model_text,
67
- "datasets_selected": required_datasets
68
  }
69
 
70
- # Prepare file path
71
- DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)
72
-
73
- fn_datasets = '@ '.join(dataset_selection)
74
- filename = model_text.replace("/","@") + "@@" + fn_datasets
75
- if filename in requested_models:
76
- return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.")
77
  try:
78
- filename_ext = filename + ".txt"
79
- out_filepath = DIR_OUTPUT_REQUESTS / filename_ext
80
-
81
- # Write the results to a text file
82
- with open(out_filepath, "w") as f:
83
- f.write(json.dumps(eval_entry))
84
-
85
- upload_file(filename, out_filepath)
86
 
87
- # Include file in the list of uploaded files
88
- requested_models.append(filename)
 
 
 
 
 
 
89
 
90
- # Remove the local file
91
- out_filepath.unlink()
92
-
93
- return styled_message("🤗 Your request has been submitted and will be evaluated soon!</p>")
94
  except Exception as e:
95
- return styled_error(f"Error submitting request!")
96
-
97
- with gr.Blocks(css=LEADERBOARD_CSS) as demo:
98
- gr.HTML(BANNER, elem_id="banner")
99
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
100
 
101
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
102
- with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
103
- leaderboard_table = gr.components.Dataframe(
104
- value=original_df,
105
- datatype=TYPES,
106
- elem_id="leaderboard-table",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  interactive=False,
108
- visible=True,
109
- )
110
-
111
- with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
112
- gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
113
-
114
- with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
115
- with gr.Column():
116
- gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
117
  with gr.Column():
118
- gr.Markdown("Select a dataset:", elem_classes="markdown-text")
119
- with gr.Column():
120
- model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
121
- chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False)
122
- with gr.Column():
123
- mdw_submission_result = gr.Markdown()
124
- btn_submitt = gr.Button(value="🚀 Request")
125
- btn_submitt.click(request_model,
126
- [model_name_textbox, chb_coco2017],
127
- mdw_submission_result)
128
-
129
- gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")
130
-
131
- with gr.Row():
132
- with gr.Accordion("📙 Citation", open=False):
133
- gr.Textbox(
134
- value=CITATION_TEXT, lines=7,
135
- label="Copy the BibTeX snippet to cite this source",
136
- elem_id="citation-button",
137
- show_copy_button=True,
 
 
 
 
 
 
 
 
 
138
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- demo.launch(ssr_mode=False)
 
1
+
2
  import gradio as gr
3
  import pandas as pd
4
  import json
5
+ from pathlib import Path
 
 
6
  from datetime import datetime, timezone
7
 
8
+ LAST_UPDATED = "Dec 4th 2024"
9
+ QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
10
+ APP_DIR = Path("./")
11
 
12
+ # Modified column names for phonemic transcription metrics
13
  column_names = {
14
  "MODEL": "Model",
15
+ "SUBMISSION_NAME": "Submission Name",
16
+ "AVG_PER": "Average PER ⬇️",
17
+ "AVG_PFER": "Average PFER ⬇️",
18
+ "SUBSET": "Dataset Subset",
19
+ "GITHUB_URL": "GitHub",
20
+ "DATE": "Submission Date"
21
  }
22
 
23
+ def load_leaderboard_data():
24
+ leaderboard_path = QUEUE_DIR / "leaderboard.json"
25
+ if not leaderboard_path.exists():
26
+ print(f"Warning: Leaderboard file not found at {leaderboard_path}")
27
+ return pd.DataFrame()
28
+
29
+ try:
30
+ with open(leaderboard_path, 'r') as f:
31
+ data = json.load(f)
32
+ df = pd.DataFrame(data)
33
+ return df
34
+ except Exception as e:
35
+ print(f"Error loading leaderboard data: {e}")
36
+ return pd.DataFrame()
37
 
38
+ def format_leaderboard_df(df):
39
+ if df.empty:
40
+ return df
41
+
42
+ # Rename columns to display names
43
+ display_df = df.rename(columns={
44
+ "model": "MODEL",
45
+ "submission_name": "SUBMISSION_NAME",
46
+ "average_per": "AVG_PER",
47
+ "average_pfer": "AVG_PFER",
48
+ "subset": "SUBSET",
49
+ "github_url": "GITHUB_URL",
50
+ "submission_date": "DATE"
51
+ })
52
 
53
+ # Format numeric columns
54
+ display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
55
+ display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}")
56
+
57
+ # Make GitHub URLs clickable
58
+ display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
59
+ lambda x: f'<a href="{x}" target="_blank">Repository</a>' if x else "N/A"
60
+ )
61
+
62
+ # Sort by PER (ascending)
63
+ display_df.sort_values(by="AVG_PER", inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ return display_df
 
 
 
66
 
67
+ def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5):
68
+ if not model_name or not submission_name:
69
+ return gr.Markdown("⚠️ Please provide both model name and submission name.")
70
 
71
+ request_data = {
72
+ "transcription_model": model_name,
73
+ "subset": subset,
74
+ "max_samples": max_samples,
75
+ "submission_name": submission_name,
76
+ "github_url": github_url or ""
 
 
 
 
 
 
77
  }
78
 
 
 
 
 
 
 
 
79
  try:
80
+ # Ensure queue directory exists
81
+ QUEUE_DIR.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
82
 
83
+ # Generate unique timestamp for request file
84
+ timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-")
85
+ request_file = QUEUE_DIR / f"request_{timestamp}.json"
86
+
87
+ with open(request_file, 'w') as f:
88
+ json.dump(request_data, f, indent=2)
89
+
90
+ return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")
91
 
 
 
 
 
92
  except Exception as e:
93
+ return gr.Markdown(f"Error submitting request: {str(e)}")
 
 
 
 
94
 
95
+ def load_results_for_model(model_name):
96
+ results_path = QUEUE_DIR / "results.json"
97
+ try:
98
+ with open(results_path, 'r') as f:
99
+ results = json.load(f)
100
+
101
+ # Filter results for the specific model
102
+ model_results = [r for r in results if r["model"] == model_name]
103
+ if not model_results:
104
+ return None
105
+
106
+ # Get the most recent result
107
+ latest_result = max(model_results, key=lambda x: x["timestamp"])
108
+ return latest_result
109
+ except Exception as e:
110
+ print(f"Error loading results: {e}")
111
+ return None
112
+
113
+ # Create Gradio interface
114
+ with gr.Blocks() as demo:
115
+ gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
116
+ gr.Markdown("""
117
+ Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks.
118
+
119
+ **Metrics:**
120
+ - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better)
121
+ - **PFER (Phoneme Frame Error Rate)**: Measures frame-level phoneme prediction accuracy (lower is better)
122
+ """)
123
+
124
+ with gr.Tabs() as tabs:
125
+ with gr.TabItem("🏆 Leaderboard"):
126
+ leaderboard_df = load_leaderboard_data()
127
+ formatted_df = format_leaderboard_df(leaderboard_df)
128
+
129
+ leaderboard_table = gr.DataFrame(
130
+ value=formatted_df,
131
  interactive=False,
132
+ headers=list(column_names.values())
133
+ )
134
+
135
+ refresh_btn = gr.Button("🔄 Refresh Leaderboard")
136
+ refresh_btn.click(
137
+ lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data()))
138
+ )
139
+
140
+ with gr.TabItem("📝 Submit Model"):
141
  with gr.Column():
142
+ model_input = gr.Textbox(
143
+ label="Model Name",
144
+ placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft",
145
+ info="Enter the Hugging Face model ID"
146
+ )
147
+ submission_name = gr.Textbox(
148
+ label="Submission Name",
149
+ placeholder="My Awesome Model v1.0",
150
+ info="Give your submission a descriptive name"
151
+ )
152
+ github_url = gr.Textbox(
153
+ label="GitHub Repository URL (optional)",
154
+ placeholder="https://github.com/username/repo",
155
+ info="Link to your model's code repository"
156
+ )
157
+
158
+ submit_btn = gr.Button("🚀 Submit for Evaluation")
159
+ result_text = gr.Markdown()
160
+
161
+ submit_btn.click(
162
+ request_evaluation,
163
+ inputs=[model_input, submission_name, github_url],
164
+ outputs=result_text
165
+ )
166
+
167
+ with gr.TabItem("ℹ️ Detailed Results"):
168
+ model_selector = gr.Textbox(
169
+ label="Enter Model Name to View Details",
170
+ placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft"
171
  )
172
+ view_btn = gr.Button("View Results")
173
+ results_json = gr.JSON(label="Detailed Results")
174
+
175
+ def show_model_results(model_name):
176
+ results = load_results_for_model(model_name)
177
+ return results or {"error": "No results found for this model"}
178
+
179
+ view_btn.click(
180
+ show_model_results,
181
+ inputs=[model_selector],
182
+ outputs=[results_json]
183
+ )
184
+
185
+ gr.Markdown(f"Last updated: {LAST_UPDATED}")
186
 
187
+ demo.launch()
utils_display.py CHANGED
@@ -13,7 +13,7 @@ def fields(raw_class):
13
  @dataclass(frozen=True)
14
  class AutoEvalColumn: # Auto evals column
15
  model = ColumnContent("Model", "markdown")
16
- avg_wer = ColumnContent("Average WER ⬇️", "number")
17
  avg_wped = ColumnContent("Average PWED ⬇️", "number")
18
 
19
 
 
13
  @dataclass(frozen=True)
14
  class AutoEvalColumn: # Auto evals column
15
  model = ColumnContent("Model", "markdown")
16
+ avg_per = ColumnContent("Average PER ⬇️", "number")
17
  avg_wped = ColumnContent("Average PWED ⬇️", "number")
18
 
19