pteb-leaderboard / utils.py
tabedini's picture
Create utils.py
61f1e83 verified
raw
history blame
6.1 kB
import json
import os
from datetime import datetime
import gradio as gr
import pandas as pd
from envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn&display=swap');
body, .gradio-container, .gr-button, .gr-input, .gr-slider, .gr-dropdown, .gr-markdown {
font-family: 'Vazirmatn', sans-serif !important;
}
.markdown-text {
font-size: 16px !important;
}
#models-to-add-text {
font-size: 18px !important;
}
#citation-button span {
font-size: 16px !important;
}
#citation-button textarea {
font-size: 16px !important;
}
#citation-button > label > button {
margin: 6px;
transform: scale(1.3);
}
#leaderboard-table {
margin-top: 15px;
text-align: center;
}
#leaderboard-table,
#leaderboard-table th,
#leaderboard-table td {
text-align: center;
vertical-align: middle;
border-collapse: collapse;
}
#leaderboard-table td:first-child,
#leaderboard-table th:first-child {
text-align: left;
max-width: 600px;
}
table > thead {
white-space: normal;
}
table > thead th,
table > tbody td {
text-align: center;
vertical-align: middle;
}
table > tbody td:first-child {
text-align: left;
max-width: 600px;
}
#leaderboard-table-lite {
margin-top: 15px;
}
#search-bar-table-box > div:first-child {
background: none;
border: none;
}
#search-bar {
padding: 0px;
}
.tab-buttons button {
font-size: 20px;
}
#scale-logo {
border-style: none !important;
box-shadow: none;
display: block;
margin-left: auto;
margin-right: auto;
max-width: 600px;
}
#scale-logo .download {
display: none;
}
#filter_type {
border: 0;
padding-left: 0;
padding-top: 0;
}
#filter_type label {
display: flex;
}
#filter_type label > span {
margin-top: var(--spacing-lg);
margin-right: 0.5em;
}
#filter_type label > .wrap {
width: 103px;
}
#filter_type label > .wrap .wrap-inner {
padding: 2px;
}
#filter_type label > .wrap .wrap-inner input {
width: 1px;
}
#filter-columns-type {
border: 0;
padding: 0.5;
}
#filter-columns-size {
border: 0;
padding: 0.5;
}
#box-filter > .form {
border: 0;
}
"""
ABOUT_TEXT = f"""
# Persian Text Embedding Benchmark (v1.0.0)
"""
SUBMIT_TEXT = """## Submitting a Model for Evaluation
> To submit your open-source model for evaluation, follow these steps:
>
> 1. **Ensure your model is on Hugging Face**: Your model must be publicly available on [Hugging Face](https://huggingface.co/).
>
> 2. **Submit Request**: Send a request with your model's Hugging Face identifier.
>
> 3. **Manual Queue**: Please note that the evaluation process is currently handled manually. Submissions will be queued and processed as soon as possible.
>
> 4. **Results**: Once the evaluation is complete, your model’s results will be updated on the leaderboard.
>
> We appreciate your patience and contributions to the Persian LM ecosystem!
"""
PART_LOGO = """
<img src="https://avatars.githubusercontent.com/u/39557177?v=4" style="width:30%;display:block;margin-left:auto;margin-right:auto">
<h1 style="font-size: 28px; margin-bottom: 2px;">Part DP AI</h1>
"""
def load_jsonl(input_file):
data = []
with open(input_file, 'r') as f:
for line in f:
data.append(json.loads(line))
return data
def jsonl_to_dataframe(input_file):
data = load_jsonl(input_file)
return pd.DataFrame(data)
def sort_dataframe_by_column(df, column_name):
if column_name not in df.columns:
raise ValueError(f"Column '{column_name}' does not exist in the DataFrame.")
return df.sort_values(by=column_name, ascending=False).reset_index(drop=True)
def add_average_column_to_df(df,columns_to_average, index=3, average_column_name="Average Accuracy"):
average_column = df[columns_to_average].mean(axis=1)
df.insert(index, average_column_name, average_column)
return df
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def make_clickable_model(model_name):
link = f"https://huggingface.co/{model_name}"
return model_hyperlink(link, model_name)
def center_align_markdown(text):
return f'<div align="center">{text}</div>'
def apply_markdown_format_for_columns(df, model_column_name):
columns = list(df.columns)
df[model_column_name] = df[model_column_name].apply(make_clickable_model)
# for column in columns:
# if column != model_column_name:
# df[column] = df[column].apply(center_align_markdown)
return df
def submit(model_name, model_id, contact_email):
if model_name == "" or model_id == "" or contact_email == "":
gr.Info("Please fill all the fields")
return
try:
user_name = ""
if "/" in model_id:
user_name = model_id.split("/")[0]
model_path = model_id.split("/")[1]
eval_entry = {
"model_name": model_name,
"model_id": model_id,
"contact_email": contact_email,
}
# Get the current timestamp to add to the filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
os.makedirs(OUT_DIR, exist_ok=True)
# Add the timestamp to the filename
out_path = f"{OUT_DIR}/{user_name}_{model_path}_{timestamp}.json"
with open(out_path, "w") as f:
f.write(json.dumps(eval_entry))
print("Uploading eval file")
API.upload_file(
path_or_fileobj=out_path,
path_in_repo=out_path.split("eval-queue/")[1],
repo_id=QUEUE_REPO,
repo_type="dataset",
commit_message=f"Add {model_name} to eval queue",
)
gr.Info("Successfully submitted", duration=10)
# Remove the local file
os.remove(out_path)
except Exception as e:
gr.Error(f"Error submitting the model: {e}")