lm-similarity / src /dataloading.py
Joschka Strueber
[Ref] error handling
30bd486
raw
history blame
2.45 kB
import datasets
import numpy as np
from huggingface_hub import HfApi
from functools import lru_cache
def get_leaderboard_models():
#api = HfApi()
# List all datasets in the open-llm-leaderboard organization
#datasets = api.list_datasets(author="open-llm-leaderboard")
models = []
#for dataset in datasets:
# if dataset.id.endswith("-details"):
# # Format: "open-llm-leaderboard/<provider>__<model_name>-details"
# model_part = dataset.id.split("/")[-1].replace("-details", "")
# provider, model = model_part.split("__", 1)
# models.append(f"{provider}/{model}")
# Example models
models = [
"meta_llama/Llama-3.2-1B-Instruct",
"meta_llama/Llama-3.2-3B-Instruct",
"meta_llama/Llama-3.1-8B-Instruct",
"meta_llama/Llama-3.1-70B-Instruct",
"meta_llama/Llama-3.3-70B-Instruct",
]
return sorted(models)
@lru_cache(maxsize=1)
def get_leaderboard_models_cached():
return get_leaderboard_models()
def get_leaderboard_datasets():
return [
"ai2_arc",
"hellaswag",
"mmlu_pro",
"truthful_qa",
"winogrande",
"gsm8k"
]
def filter_labels(doc):
labels = []
if "answer_index" in doc[0].keys():
for d in doc:
labels.append(int(d["answer_index"]))
else:
for d in doc:
if d["answer"] == "False":
labels.append(0)
elif d["answer"] == "True":
labels.append(1)
else:
raise ValueError("Invalid label")
def load_run_data(model_name, dataset_name):
try:
model_name = model_name.replace("/", "__")
data = datasets.load_dataset("open-llm-leaderboard/" + model_name + "-details",
name=model_name + "__leaderboard_" + dataset_name,
split="latest")
data = data.sort("doc_id")
data = data.to_dict()
# Get log probabilities for each response
log_probs = []
for resp in data["filtered_resps"]:
log_prob = np.array([float(option[0]) for option in resp])
log_probs.append(log_prob)
# Get ground truth labels
labels = filter_labels(data["doc"])
except Exception as e:
print(e)
log_probs = []
labels = []
return log_probs, labels