IPA-Transcription-EN

Running

App Files Files Community

IPA-Transcription-EN / app.py

arunasrivastava

it worked! mostly

a2c34b1 5 months ago

raw

history blame

6.8 kB


	import gradio as gr
	import pandas as pd
	import json
	from pathlib import Path
	from datetime import datetime, timezone

	LAST_UPDATED = "Dec 4th 2024"
	QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
	APP_DIR = Path("./")

	# Modified column names for phonemic transcription metrics
	column_names = {
	"MODEL": "Model",
	"SUBMISSION_NAME": "Submission Name",
	"AVG_PER": "Average PER ⬇️",
	"AVG_PFER": "Average PFER ⬇️",
	"SUBSET": "Dataset Subset",
	"GITHUB_URL": "GitHub",
	"DATE": "Submission Date"
	}

	def load_leaderboard_data():
	leaderboard_path = QUEUE_DIR / "leaderboard.json"
	if not leaderboard_path.exists():
	print(f"Warning: Leaderboard file not found at {leaderboard_path}")
	return pd.DataFrame()

	try:
	with open(leaderboard_path, 'r') as f:
	data = json.load(f)
	df = pd.DataFrame(data)
	return df
	except Exception as e:
	print(f"Error loading leaderboard data: {e}")
	return pd.DataFrame()

	def format_leaderboard_df(df):
	if df.empty:
	return df

	# Rename columns to display names
	display_df = df.rename(columns={
	"model": "MODEL",
	"submission_name": "SUBMISSION_NAME",
	"average_per": "AVG_PER",
	"average_pfer": "AVG_PFER",
	"subset": "SUBSET",
	"github_url": "GITHUB_URL",
	"submission_date": "DATE"
	})

	# Format numeric columns
	display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
	display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}")

	# Make GitHub URLs clickable
	display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
	lambda x: f'<a href="{x}" target="_blank">Repository</a>' if x else "N/A"
	)

	# Sort by PER (ascending)
	display_df.sort_values(by="AVG_PER", inplace=True)

	return display_df

	def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5):
	if not model_name or not submission_name:
	return gr.Markdown("⚠️ Please provide both model name and submission name.")

	request_data = {
	"transcription_model": model_name,
	"subset": subset,
	"max_samples": max_samples,
	"submission_name": submission_name,
	"github_url": github_url or ""
	}

	try:
	# Ensure queue directory exists
	QUEUE_DIR.mkdir(parents=True, exist_ok=True)

	# Generate unique timestamp for request file
	timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-")
	request_file = QUEUE_DIR / f"request_{timestamp}.json"

	with open(request_file, 'w') as f:
	json.dump(request_data, f, indent=2)

	return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")

	except Exception as e:
	return gr.Markdown(f"❌ Error submitting request: {str(e)}")

	def load_results_for_model(model_name):
	results_path = QUEUE_DIR / "results.json"
	try:
	with open(results_path, 'r') as f:
	results = json.load(f)

	# Filter results for the specific model
	model_results = [r for r in results if r["model"] == model_name]
	if not model_results:
	return None

	# Get the most recent result
	latest_result = max(model_results, key=lambda x: x["timestamp"])
	return latest_result
	except Exception as e:
	print(f"Error loading results: {e}")
	return None

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
	gr.Markdown("""
	Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks.

	Metrics:
	- PER (Phoneme Error Rate): Measures the edit distance between predicted and ground truth phonemes (lower is better)
	- PFER (Phoneme Frame Error Rate): Measures frame-level phoneme prediction accuracy (lower is better)
	""")

	with gr.Tabs() as tabs:
	with gr.TabItem("🏆 Leaderboard"):
	leaderboard_df = load_leaderboard_data()
	formatted_df = format_leaderboard_df(leaderboard_df)

	leaderboard_table = gr.DataFrame(
	value=formatted_df,
	interactive=False,
	headers=list(column_names.values())
	)

	refresh_btn = gr.Button("🔄 Refresh Leaderboard")
	refresh_btn.click(
	lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data()))
	)

	with gr.TabItem("📝 Submit Model"):
	with gr.Column():
	model_input = gr.Textbox(
	label="Model Name",
	placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft",
	info="Enter the Hugging Face model ID"
	)
	submission_name = gr.Textbox(
	label="Submission Name",
	placeholder="My Awesome Model v1.0",
	info="Give your submission a descriptive name"
	)
	github_url = gr.Textbox(
	label="GitHub Repository URL (optional)",
	placeholder="https://github.com/username/repo",
	info="Link to your model's code repository"
	)

	submit_btn = gr.Button("🚀 Submit for Evaluation")
	result_text = gr.Markdown()

	submit_btn.click(
	request_evaluation,
	inputs=[model_input, submission_name, github_url],
	outputs=result_text
	)

	with gr.TabItem("ℹ️ Detailed Results"):
	model_selector = gr.Textbox(
	label="Enter Model Name to View Details",
	placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft"
	)
	view_btn = gr.Button("View Results")
	results_json = gr.JSON(label="Detailed Results")

	def show_model_results(model_name):
	results = load_results_for_model(model_name)
	return results or {"error": "No results found for this model"}

	view_btn.click(
	show_model_results,
	inputs=[model_selector],
	outputs=[results_json]
	)

	gr.Markdown(f"Last updated: {LAST_UPDATED}")

	demo.launch()