|
import json |
|
import os |
|
import re |
|
from datetime import datetime |
|
from typing import Tuple |
|
|
|
import pandas as pd |
|
from bs4 import BeautifulSoup |
|
|
|
|
|
def format_datetime(dt_str: str) -> str: |
|
""" |
|
Format a datetime string for display. |
|
|
|
:param dt_str: String representing a datetime in ISO format |
|
:return: Formatted datetime string |
|
""" |
|
return dt_str.replace("T", " ").split("+")[0] |
|
|
|
|
|
def read_json_line_by_line(file_path, commit_hash=None): |
|
""" |
|
Read a JSON file line by line, parsing each line as a separate JSON object. |
|
Optionally filter by commit_hash if provided. |
|
|
|
:param file_path: Path to the JSON file |
|
:param commit_hash: Optional commit hash to filter data |
|
:return: List of parsed JSON objects |
|
""" |
|
data = [] |
|
with open(file_path, "r") as f: |
|
for line in f: |
|
try: |
|
item = json.loads(line.strip()) |
|
|
|
if commit_hash is None or item.get("commit_hash") == commit_hash: |
|
data.append(item) |
|
except json.JSONDecodeError: |
|
print(f"Skipping invalid JSON in {file_path}: {line}") |
|
return data |
|
|
|
|
|
def calculate_change(new: float, old: float, metric_name: str) -> Tuple[float, str]: |
|
"""Calculate percentage change and return with appropriate emoji.""" |
|
pct_change = new - old |
|
if abs(pct_change) < 1: |
|
emoji = "βοΈ" |
|
elif pct_change > 0: |
|
emoji = "π’" if "wer" not in metric_name.lower() else "β" |
|
else: |
|
emoji = "β" if "wer" not in metric_name.lower() else "π’" |
|
|
|
return (pct_change, emoji) |
|
|
|
|
|
def has_changes(config, prev_dict, curr_dict): |
|
"""Check if any metrics have changed.""" |
|
curr = curr_dict[config] |
|
prev = prev_dict[config] |
|
|
|
metrics = ["speed", "tokens_per_second", "average_wer", "qoi"] |
|
for key in metrics: |
|
if key in curr and key in prev: |
|
curr_val = curr[key] |
|
prev_val = prev[key] |
|
if abs(curr_val - prev_val) >= 1: |
|
return True |
|
return False |
|
|
|
|
|
def format_metrics_table(config, prev_dict, curr_dict, improved, regressed): |
|
"""Format metrics into a table string and track improvements/regressions.""" |
|
curr = curr_dict[config] |
|
prev = prev_dict[config] |
|
|
|
|
|
|
|
metrics = [ |
|
("Speed", "speed"), |
|
("Tok/s", "tokens_per_second"), |
|
("WER", "average_wer"), |
|
("QoI", "qoi"), |
|
] |
|
|
|
table = "```\nMetric Previous Current Change\n--------------------------------\n" |
|
for metric_name, key in metrics: |
|
if key in curr and key in prev: |
|
curr_val = curr[key] |
|
prev_val = prev[key] |
|
pct_change, _ = calculate_change(curr_val, prev_val, metric_name) |
|
if abs(pct_change) >= 1: |
|
table += f"{metric_name:<9} {prev_val:<11.2f} {curr_val:<10.2f} {pct_change:.2f}\n" |
|
|
|
if pct_change > 0: |
|
if "wer" not in metric_name.lower(): |
|
improved[key] += 1 |
|
else: |
|
regressed[key] += 1 |
|
else: |
|
if "wer" not in metric_name.lower(): |
|
regressed[key] += 1 |
|
else: |
|
improved[key] += 1 |
|
table += "```" |
|
return table |
|
|
|
|
|
def extract_status_and_os(cell_value): |
|
""" |
|
Extract status and OS versions from a cell, handling both HTML and plain text. |
|
Returns list of tuples: [(status, os_version), ...] |
|
""" |
|
results = [] |
|
cell_value = str(cell_value) |
|
|
|
|
|
if cell_value == "Not Supported": |
|
return results |
|
|
|
|
|
parts = cell_value.split("<p>") |
|
|
|
for part in parts: |
|
part = part.strip("</p>") |
|
if not part: |
|
continue |
|
|
|
|
|
if "β οΈ" in part: |
|
|
|
soup = BeautifulSoup(part, "html.parser") |
|
|
|
text = soup.get_text() |
|
os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", text) |
|
if os_match: |
|
os_version = os_match.group(0) |
|
results.append(("β οΈ", os_version)) |
|
else: |
|
|
|
os_match = re.search(r"(iOS|iPadOS|macOS)\s+[\d.]+", part) |
|
if os_match: |
|
os_version = os_match.group(0) |
|
results.append(("β
", os_version)) |
|
|
|
return results |
|
|
|
|
|
def escape_string(s: str) -> str: |
|
"""Escape a string to be used as a value in JSON.""" |
|
return ( |
|
s.replace("\\", "\\\\") |
|
.replace('"', '\\"') |
|
.replace("\n", "\\n") |
|
.replace("\r", "\\r") |
|
) |
|
|
|
|
|
def analyze_support_changes(prev_csv, curr_csv): |
|
"""Analyze support changes between CSV files.""" |
|
|
|
prev_df = pd.read_csv(prev_csv) |
|
prev_df.set_index(prev_df.columns[0], inplace=True) |
|
|
|
curr_df = pd.read_csv(curr_csv) |
|
curr_df.set_index(curr_df.columns[0], inplace=True) |
|
|
|
|
|
prev_devices = sorted(prev_df.columns[1:]) |
|
curr_devices = sorted(curr_df.columns[1:]) |
|
|
|
|
|
device_ratio = len(curr_devices) / len(prev_devices) if prev_devices else 1 |
|
needs_alert = device_ratio < 0.9 |
|
|
|
|
|
prev_status = {} |
|
curr_status = {} |
|
|
|
|
|
for idx in range(len(prev_df)): |
|
model = prev_df.index[idx] |
|
for col_idx in range(1, len(prev_df.columns)): |
|
cell_value = prev_df.iloc[idx, col_idx] |
|
device = prev_df.columns[col_idx] |
|
statuses = extract_status_and_os(cell_value) |
|
for status, os_version in statuses: |
|
prev_status[(model, device, os_version)] = status |
|
|
|
|
|
new_configs = [] |
|
for idx in range(len(curr_df)): |
|
model = curr_df.index[idx] |
|
for col_idx in range(1, len(curr_df.columns)): |
|
cell_value = curr_df.iloc[idx, col_idx] |
|
device = curr_df.columns[col_idx] |
|
statuses = extract_status_and_os(cell_value) |
|
for status, os_version in statuses: |
|
curr_status[(model, device, os_version)] = status |
|
|
|
if (model, device, os_version) not in prev_status: |
|
new_configs.append((model, device, os_version)) |
|
|
|
|
|
fixed_errors = [] |
|
new_errors = [] |
|
|
|
|
|
common_configs = set(prev_status.keys()) & set(curr_status.keys()) |
|
for config in common_configs: |
|
model, device, os_version = config |
|
if prev_status[config] == "β οΈ" and curr_status[config] == "β
": |
|
fixed_errors.append((model, device, os_version)) |
|
elif prev_status[config] == "β
" and curr_status[config] == "β οΈ": |
|
new_errors.append((model, device, os_version)) |
|
|
|
return fixed_errors, new_errors, new_configs, needs_alert |
|
|
|
|
|
def generate_report(): |
|
|
|
with open("report_data/version.json", "r") as f: |
|
version_data = json.load(f) |
|
|
|
|
|
releases = version_data.get("releases", []) |
|
if len(releases) >= 2: |
|
curr_commit_hash = releases[-1] |
|
prev_commit_hash = releases[-2] |
|
else: |
|
curr_commit_hash = releases[-1] if releases else "" |
|
prev_commit_hash = "" |
|
|
|
|
|
prev_perf_data = read_json_line_by_line("dashboard_data/performance_data.json", commit_hash=prev_commit_hash) |
|
curr_perf_data = read_json_line_by_line("report_data/performance_data.json", commit_hash=curr_commit_hash) |
|
|
|
prev_dict = {(d["model"], d["device"], d["os"]): d for d in prev_perf_data} |
|
curr_dict = {(d["model"], d["device"], d["os"]): d for d in curr_perf_data} |
|
common_configs = set(curr_dict.keys()) & set(prev_dict.keys()) |
|
|
|
|
|
with open("dashboard_data/version.json", "r") as f: |
|
prev_version = json.load(f) |
|
with open("report_data/version.json", "r") as f: |
|
curr_version = json.load(f) |
|
|
|
prev_releases = set(prev_version.get("releases", [])) |
|
curr_releases = set(curr_version.get("releases", [])) |
|
new_releases = curr_releases - prev_releases |
|
removed_releases = prev_releases - curr_releases |
|
|
|
|
|
improved_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} |
|
regressed_metrics = {"speed": 0, "tokens_per_second": 0, "average_wer": 0, "qoi": 0} |
|
|
|
new_data_points = len(set(curr_dict.keys()) - set(prev_dict.keys())) |
|
|
|
|
|
fixed_errors, new_errors, new_configs, needs_alert = analyze_support_changes( |
|
"report_data/support_data.csv", "dashboard_data/support_data.csv" |
|
) |
|
|
|
|
|
current_time = datetime.now().strftime("%B %-d, %Y %H:%M:%S") |
|
prev_release_tag, curr_release_tag = ( |
|
prev_version["versions"][-1] if prev_version["versions"] else "N/A", |
|
curr_version["versions"][-1], |
|
) |
|
slack_blocks = { |
|
"blocks": [ |
|
{ |
|
"type": "header", |
|
"text": { |
|
"type": "plain_text", |
|
"text": "π WhisperKit Dataset Update Report π", |
|
"emoji": True, |
|
}, |
|
}, |
|
{ |
|
"type": "context", |
|
"elements": [{"text": f"*{current_time}*", "type": "mrkdwn"}], |
|
}, |
|
{"type": "divider"}, |
|
{ |
|
"type": "section", |
|
"text": {"type": "mrkdwn", "text": "βΉοΈ *CURRENT VERSION INFO* βΉοΈ"}, |
|
}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *Last Modified:* `{format_datetime(curr_version['last_modified'])}`", |
|
}, |
|
}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *Dataset SHA:* `{curr_version['sha']}`", |
|
}, |
|
}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *Current Releases:* {', '.join(f'`{r}`' for r in curr_version['releases'])}", |
|
}, |
|
}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *Current Release Tag:* `{curr_release_tag}`", |
|
}, |
|
}, |
|
{"type": "divider"}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": "π *SUMMARY OF PERFORMANCE UPDATES* π", |
|
}, |
|
}, |
|
] |
|
} |
|
|
|
|
|
slack_blocks["blocks"].extend( |
|
[ |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *Added Releases:* {', '.join(sorted(new_releases)) if new_releases else 'None'}", |
|
}, |
|
}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *Removed Releases:* {', '.join(sorted(removed_releases)) if removed_releases else 'None'}", |
|
}, |
|
}, |
|
] |
|
) |
|
if prev_release_tag != curr_release_tag: |
|
slack_blocks["blocks"].append( |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *Release Tag Change:* `{prev_release_tag}` β `{curr_release_tag}`", |
|
}, |
|
} |
|
) |
|
slack_blocks["blocks"].extend( |
|
[ |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": "\n", |
|
}, |
|
}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *New Data Points:* `{new_data_points}` new configurations", |
|
}, |
|
}, |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": "\n", |
|
}, |
|
}, |
|
] |
|
) |
|
|
|
|
|
if common_configs: |
|
performance_text = "π‘ *Performance Updates* π‘\n\n" |
|
|
|
|
|
models = sorted(set(model for model, _, _ in common_configs)) |
|
|
|
for model in models: |
|
model_configs = sorted([cfg for cfg in common_configs if cfg[0] == model]) |
|
|
|
for config in model_configs: |
|
device_info = f"*{model}* ({config[2]})" |
|
|
|
if not has_changes(config, prev_dict, curr_dict): |
|
|
|
performance_text += f"{device_info} β
\n\n" |
|
else: |
|
|
|
performance_text += f"{device_info}\n" |
|
table = format_metrics_table(config, prev_dict, curr_dict, improved_metrics, regressed_metrics) |
|
performance_text += table |
|
performance_text += "\n\n" |
|
|
|
|
|
for metric_name, key in [ |
|
("Speed", "speed"), |
|
("Tok/s", "tokens_per_second"), |
|
("WER", "average_wer"), |
|
("QoI", "qoi"), |
|
]: |
|
slack_blocks["blocks"].append( |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ *{metric_name}:* `{improved_metrics[key]}` improved, `{regressed_metrics[key]}` regressed", |
|
}, |
|
} |
|
) |
|
|
|
|
|
if fixed_errors or new_errors or new_configs: |
|
slack_blocks["blocks"].extend( |
|
[ |
|
{"type": "divider"}, |
|
{ |
|
"type": "section", |
|
"text": {"type": "mrkdwn", "text": "π± *DEVICE SUPPORT CHANGES* π±"}, |
|
}, |
|
] |
|
) |
|
|
|
if fixed_errors: |
|
slack_blocks["blocks"].extend( |
|
[ |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": "*Successful Configurations That Override Previous Failures*", |
|
}, |
|
} |
|
] |
|
) |
|
for model, device, os_version in sorted(fixed_errors): |
|
slack_blocks["blocks"].append( |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ {model} on {device} ({os_version})", |
|
}, |
|
} |
|
) |
|
|
|
if new_errors: |
|
slack_blocks["blocks"].extend( |
|
[ |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": "*Failed Configurations That Override Previous Successes*", |
|
}, |
|
} |
|
] |
|
) |
|
for model, device, os_version in sorted(new_errors): |
|
slack_blocks["blocks"].append( |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ {model} on {device} ({os_version})", |
|
}, |
|
} |
|
) |
|
|
|
if new_configs: |
|
slack_blocks["blocks"].extend( |
|
[ |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": "*Newly Tested Configurations*", |
|
}, |
|
} |
|
] |
|
) |
|
for model, device, os_version in sorted(new_configs): |
|
slack_blocks["blocks"].append( |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": f"β’ {model} on {device} ({os_version})", |
|
}, |
|
} |
|
) |
|
|
|
|
|
if needs_alert: |
|
slack_blocks["blocks"].append( |
|
{ |
|
"type": "section", |
|
"text": { |
|
"type": "mrkdwn", |
|
"text": "β οΈ *ALERT:* Current device count is less than 90% of previous version's device count, test on more devices before updating the benchmark website!", |
|
}, |
|
} |
|
) |
|
|
|
|
|
github_output = os.getenv("GITHUB_OUTPUT") |
|
if github_output: |
|
with open(github_output, "a") as f: |
|
f.write("slack_message_payload<<EOF\n") |
|
json.dump(slack_blocks, f, indent=2) |
|
f.write("\nEOF\n") |
|
|
|
with open(github_output, "a") as f: |
|
escaped_text = escape_string(performance_text) |
|
print(f"performance_message={escaped_text}", file=f) |
|
|
|
|
|
if __name__ == "__main__": |
|
generate_report() |
|
|