Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,247 Bytes
167137b 4ae93a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
from typing import Literal
from huggingface_hub import HfFileSystem, hf_hub_download
KEY_TO_CATEGORY_NAME = {
"full": "Overall",
"coding": "Coding",
"long_user": "Longer Query",
"english": "English",
"chinese": "Chinese",
"french": "French",
"no_tie": "Exclude Ties",
"no_short": "Exclude Short Query (< 5 tokens)",
"no_refusal": "Exclude Refusal",
}
CAT_NAME_TO_EXPLANATION = {
"Overall": "Overall Questions",
"Coding": "Coding: whether conversation contains code snippets",
"Longer Query": "Longer Query (>= 500 tokens)",
"English": "English Prompts",
"Chinese": "Chinese Prompts",
"French": "French Prompts",
"Exclude Ties": "Exclude Ties and Bothbad",
"Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
"Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
}
PROPRIETARY_LICENSES = [
"Proprietary",
]
def download_latest_data_from_space(
repo_id: str, file_type: Literal["pkl", "csv"]
) -> str:
"""
Downloads the latest data file of the specified file type from the given repository space.
Args:
repo_id (str): The ID of the repository space.
file_type (Literal["pkl", "csv"]): The type of the data file to download. Must be either "pkl" or "csv".
Returns:
str: The local file path of the downloaded data file.
"""
def extract_date(filename):
return filename.split("/")[-1].split(".")[0].split("_")[-1]
fs = HfFileSystem()
data_file_path = f"spaces/{repo_id}/*.{file_type}"
files = fs.glob(data_file_path)
latest_file = sorted(files, key=extract_date, reverse=True)[0]
latest_filepath_local = hf_hub_download(
repo_id=repo_id,
filename=latest_file.split("/")[-1],
repo_type="space",
)
return latest_filepath_local
def get_constants(dfs):
"""
Calculate and return the minimum and maximum Elo scores, as well as the maximum number of models per month.
Parameters:
- dfs (dict): A dictionary containing DataFrames for different categories.
Returns:
- min_elo_score (float): The minimum Elo score across all DataFrames.
- max_elo_score (float): The maximum Elo score across all DataFrames.
- upper_models_per_month (int): The maximum number of models per month per license across all DataFrames.
"""
filter_ranges = {}
for k, df in dfs.items():
filter_ranges[k] = {
"min_elo_score": df["rating"].min().round(),
"max_elo_score": df["rating"].max().round(),
"upper_models_per_month": int(
df.groupby(["Month-Year", "License"])["rating"]
.apply(lambda x: x.count())
.max()
),
}
min_elo_score = float("inf")
max_elo_score = float("-inf")
upper_models_per_month = 0
for _, value in filter_ranges.items():
min_elo_score = min(min_elo_score, value["min_elo_score"])
max_elo_score = max(max_elo_score, value["max_elo_score"])
upper_models_per_month = max(
upper_models_per_month, value["upper_models_per_month"]
)
return min_elo_score, max_elo_score, upper_models_per_month
|