The-Arabic-Rag-Leaderboard

Running on CPU Upgrade

The-Arabic-Rag-Leaderboard / retrieval_leaderboard.py

Update SDK version and refactor leaderboard tabs for improved structure

b85d9b0 5 months ago

3.37 kB

	from pathlib import Path

	from leaderboard_tab import (
	create_leaderboard_tab,
	search_leaderboard,
	update_columns_to_show,
	)
	from utils import load_json_results

	# Constants
	RETRIEVAL_ABOUT_SECTION = """
	## About Retrieval Evaluation

	The retrieval evaluation assesses a model's ability to find and retrieve relevant information from a large corpus of Arabic text. Models are evaluated on:

	### Web Search Dataset Metrics
	- MRR (Mean Reciprocal Rank): Measures the ranking quality by focusing on the position of the first relevant result
	- nDCG (Normalized Discounted Cumulative Gain): Evaluates the ranking quality considering all relevant results
	- Recall@5: Measures the proportion of relevant documents found in the top 5 results
	- Overall Score: Combined score calculated as the average of MRR, nDCG, and Recall@5

	### Model Requirements
	- Must support Arabic text embeddings
	- Should handle queries of at least 512 tokens
	- Must work with `sentence-transformers` library

	### Evaluation Process
	1. Models process Arabic web search queries
	2. Retrieved documents are evaluated using:
	- MRR for first relevant result positioning
	- nDCG for overall ranking quality
	- Recall@5 for top results accuracy
	3. Metrics are averaged to calculate the overall score
	4. Models are ranked based on their overall performance

	### How to Prepare Your Model
	- Ensure your model is publicly available on HuggingFace Hub (We don't support private model evaluations yet)
	- Model should output fixed-dimension embeddings for text
	- Support batch processing for efficient evaluation (this is default if you use `sentence-transformers`)
	"""

	# Global variables
	retrieval_df = None


	def load_retrieval_leaderboard():
	"""Load and prepare the retrieval leaderboard data"""
	global retrieval_df

	# Prepare retrieval dataframe
	dataframe_path = Path(__file__).parent / "results" / "retrieval_results.json"
	retrieval_df = load_json_results(
	dataframe_path, True, "Average Score", drop_cols=["Revision", "Task"]
	)
	retrieval_df.insert(0, "Rank", range(1, 1 + len(retrieval_df)))

	return retrieval_df


	def retrieval_search_leaderboard(model_name, columns_to_show):
	"""Search function for retrieval leaderboard"""
	return search_leaderboard(retrieval_df, model_name, columns_to_show)


	def update_retrieval_columns_to_show(columns_to_show):
	"""Update displayed columns for retrieval leaderboard"""
	return update_columns_to_show(retrieval_df, columns_to_show)


	def create_retrieval_tab():
	"""Create the complete retrieval leaderboard tab"""
	global retrieval_df

	# Load data if not already loaded
	if retrieval_df is None:
	retrieval_df = load_retrieval_leaderboard()

	# Define default columns to show
	default_columns = [
	"Rank",
	"Model",
	"Average Score",
	"Model Size (MB)",
	"Context Length",
	"Embedding Dimension",
	"Web Search Dataset",
	"Islamic Knowledge Dataset",
	]

	# Create and return the tab
	return create_leaderboard_tab(
	df=retrieval_df,
	initial_columns_to_show=default_columns,
	search_function=retrieval_search_leaderboard,
	update_function=update_retrieval_columns_to_show,
	about_section=RETRIEVAL_ABOUT_SECTION,
	task_type="Retriever",
	)