|
|
|
|
|
from huggingface_hub import HfApi |
|
import pandas |
|
import os |
|
import streamlit as st |
|
import datetime |
|
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES |
|
|
|
from huggingface_hub import Repository |
|
|
|
today = datetime.date.today() |
|
year, week, _ = today.isocalendar() |
|
|
|
DATASET_REPO_URL = "https://huggingface.co/datasets/patrickvonplaten/model-archs-downloads-space-data" |
|
DATA_FILENAME = f"data_{week}_{year}.csv" |
|
DATA_FILE = os.path.join("data", DATA_FILENAME) |
|
|
|
|
|
def retrieve_model_stats(): |
|
hf_api = HfApi() |
|
all_stats = {} |
|
total_downloads = 0 |
|
|
|
for model_name in list(CONFIG_MAPPING_NAMES.keys())[:2]: |
|
model_stats = {"num_downloads": 0, "%_of_all_downloads": 0, "num_models": 0, "download_per_model": 0} |
|
models = hf_api.list_models(filter=model_name) |
|
|
|
model_stats["num_models"] = len(models) |
|
model_stats["num_downloads"] = sum([m.downloads for m in models if hasattr(m, "downloads")]) |
|
if len(models) > 0: |
|
model_stats["download_per_model"] = round(model_stats["num_downloads"] / len(models), 2) |
|
total_downloads += model_stats["num_downloads"] |
|
|
|
|
|
all_stats[model_name] = model_stats |
|
|
|
for model_name in list(CONFIG_MAPPING_NAMES.keys())[:2]: |
|
all_stats[model_name]["%_of_all_downloads"] = round(all_stats[model_name]["num_downloads"] / total_downloads, 5) * 100 |
|
downloads = all_stats[model_name]["num_downloads"] |
|
all_stats[model_name]["num_downloads"] = f"{downloads:,}" |
|
|
|
sorted_results = dict(reversed(sorted(all_stats.items(), key=lambda d: d[1]["%_of_all_downloads"]))) |
|
dataframe = pandas.DataFrame.from_dict(sorted_results, orient="index") |
|
|
|
return dataframe.to_csv() |
|
|
|
|
|
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL) |
|
|
|
if not os.path.isfile(DATA_FILE): |
|
print("Create datafile...") |
|
result = retrieve_model_stats() |
|
|
|
if not os.path.isfile(DATA_FILE): |
|
with open(DATA_FILE, "w") as f: |
|
f.write(result) |
|
|
|
commit_url = repo.push_to_hub() |
|
print(commit_url) |
|
|
|
with open(DATA_FILE, "r") as f: |
|
dataframe = pandas.read_csv(DATA_FILE) |
|
|
|
print(DATA_FILE) |
|
print(dataframe) |
|
|
|
st.table(dataframe) |
|
|