Spaces:
Running
Running
File size: 6,228 Bytes
541cf85 0440741 0e06db3 0440741 0e06db3 0440741 0e06db3 0440741 541cf85 0e06db3 21f1468 0e06db3 541cf85 4ecf403 541cf85 4ecf403 993709c 541cf85 4ecf403 541cf85 4ecf403 25351e8 4ecf403 25351e8 4ecf403 f36fa2e 4ecf403 f36fa2e 4ecf403 f36fa2e 4ecf403 f36fa2e 4ecf403 993709c 0e06db3 4ecf403 0e06db3 4ecf403 0e06db3 4ecf403 0440741 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import pandas as pd
import re
from huggingface_hub import HfApi
api = HfApi()
def get_model_size(model_name, precision: str = "BF16", revision: str = "main"):
if len(model_name.split("/")) == 1:
return None
model_info = api.model_info(repo_id=model_name, revision=revision)
# model_size = get_model_size(model_info=model_info, precision=precision)
size_pattern = size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
try:
model_size = round(model_info.safetensors["total"] / 1e9, 1)
except (AttributeError, TypeError):
try:
size_match = re.search(size_pattern, model_info.modelId.lower())
model_size = size_match.group(0)
model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 1)
except AttributeError:
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
model_size = size_factor * model_size
return model_size
def make_clickable_model(model_name, link=None):
if len(model_name.split("/")) == 2:
link = "https://huggingface.co/" + model_name
return (
# f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name.split("/")[-1]}</a>'
f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name.split("/")[-1]}</a>'
)
return model_name
def load_data(data_path):
df = pd.read_csv(data_path, skiprows=1, header=0)
columns = ['Model', 'type', 'open?', 'shot', 'id-pub', 'th-pub', 'vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv']
columns_sorted = ['R','Model','type','open?','avg-pub','avg-prv','id-pub','th-pub','vi-pub', 'id-prv', 'th-prv', 'vi-prv']
columns_overall = ['Model', 'type', 'open?', 'shot', 'SeaExam-pub', 'SeaExam-prv', 'SeaBench-pub', 'SeaBench-prv']
columns_overall_sorted = ['R', 'Model', 'type', 'open?', 'shot', 'SeaExam-pub', 'SeaExam-prv', 'SeaBench-pub', 'SeaBench-prv']
# Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
df_seaexam = df.iloc[:, :12] # M3Exam columns
df_seabench = df.iloc[:, [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19]] # MMLU columns
df_overall = df.iloc[:, [0, 1, 2, 3, 7, 11, 15, 19]]
df_seaexam.columns = columns
df_seabench.columns = columns
df_overall.columns = columns_overall
# drop the row if 'avg' column is NaN
df_seaexam = df_seaexam.dropna(subset=['id-pub','th-pub','vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv'])
df_seabench = df_seabench.dropna(subset=['id-pub','th-pub','vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv'])
df_overall = df_overall.dropna(subset=['SeaExam-pub', 'SeaExam-prv'])
# # multiply the values in the ['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea'] by 100 and display as 1 decimal
for df_tmp in [df_seaexam]:
df_tmp[['id-pub', 'th-pub', 'vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv']] *= 100
df_tmp[['id-pub', 'th-pub', 'vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv']] = df_tmp[['id-pub', 'th-pub', 'vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv']].round(2)
df_seabench[['id-pub', 'th-pub', 'vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv']] = df_seabench[['id-pub', 'th-pub', 'vi-pub', 'avg-pub', 'id-prv', 'th-prv', 'vi-prv', 'avg-prv']].round(2)
df_overall[['SeaExam-pub', 'SeaExam-prv', ]] *= 100
df_overall[['SeaExam-pub', 'SeaExam-prv', 'SeaBench-pub', 'SeaBench-prv']] = df_overall[['SeaExam-pub', 'SeaExam-prv', 'SeaBench-pub', 'SeaBench-prv']].round(2)
# rank the DataFrames by the 'avg' column
df_seaexam['R'] = df_seaexam['avg-prv'].rank(ascending=False).astype(int)
df_seabench['R'] = df_seabench['avg-prv'].rank(ascending=False).astype(int)
df_overall['R'] = df_overall['SeaExam-prv'].rank(ascending=False).astype(int)
# reorder the columns
df_seaexam = df_seaexam[columns_sorted]
df_seabench = df_seabench[columns_sorted]
df_overall = df_overall[columns_overall_sorted]
# sort the DataFrames by the 'avg' column in descending order
df_seaexam = df_seaexam.sort_values(by='avg-prv', ascending=False)
df_seabench = df_seabench.sort_values(by='avg-prv', ascending=False)
df_overall = df_overall.sort_values(by='SeaExam-prv', ascending=False)
# change the column name from 'avg' to 'avg ⬇️'
df_seaexam = df_seaexam.rename(columns={'avg-prv': 'avg-prv ⬇️'})
df_seabench = df_seabench.rename(columns={'avg-prv': 'avg-prv ⬇️'})
df_overall = df_overall.rename(columns={'SeaExam-prv': 'SeaExam-prv ⬇️'})
# map the values in the 'type' column to the following values: {'base': 'Base', 'chat': 'Chat'}
df_seaexam['type'] = df_seaexam['type'].map({'base': '🟢 base', 'chat': '🔶 chat'})
df_seabench['type'] = df_seabench['type'].map({'base': '🟢 base', 'chat': '🔶 chat'})
df_overall['type'] = df_overall['type'].map({'base': '🟢 base', 'chat': '🔶 chat'})
# get the parameters of the models
# df_seaexam['params(B)'] = df_seaexam['Model'].apply(get_model_size)
# df_seabench['params(B)'] = df_seabench['Model'].apply(get_model_size)
# df_overall['params(B)'] = df_overall['Model'].apply(get_model_size)
df_seaexam['#P(B)'] = df_seaexam['Model'].apply(get_model_size)
df_seabench['#P(B)'] = df_seabench['Model'].apply(get_model_size)
df_overall['#P(B)'] = df_overall['Model'].apply(get_model_size)
# make the 'Model' column clickable
df_seaexam['Model'] = df_seaexam['Model'].apply(make_clickable_model)
df_seabench['Model'] = df_seabench['Model'].apply(make_clickable_model)
df_overall['Model'] = df_overall['Model'].apply(make_clickable_model)
# return df_m3exam, df_mmlu, df_avg
return df_seaexam, df_seabench, df_overall
if __name__ == "__main__":
model_name = "SeaLLMs/SeaLLM-7B-v2"
model_size = get_model_size(model_name)
print(model_size) |