File size: 3,322 Bytes
4cb150c 0acccaf 75abe88 0acccaf 75abe88 4cb150c 568085d 75abe88 0acccaf 75abe88 0acccaf 75abe88 0acccaf 4cb150c 0acccaf 4cb150c 75abe88 4cb150c 75abe88 4cb150c 75abe88 4cb150c 75abe88 4cb150c 75abe88 0acccaf 62d55e9 0acccaf 62d55e9 0acccaf 62d55e9 0acccaf 62d55e9 4cb150c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
from huggingface_hub import HfApi
import pandas as pd
import os
import streamlit as st
import altair as alt
import numpy as np
import datetime
from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES
from huggingface_hub import Repository
today = datetime.date.today()
year, week, _ = today.isocalendar()
DATASET_REPO_URL = "https://huggingface.co/datasets/patrickvonplaten/model-archs-downloads-space-data"
DATA_FILENAME = f"data_{week}_{year}.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
def retrieve_model_stats():
hf_api = HfApi()
all_stats = {}
total_downloads = 0
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
model_stats = {"num_downloads": 0, "%_of_all_downloads": 0, "num_models": 0, "download_per_model": 0}
models = hf_api.list_models(filter=model_name)
model_stats["num_models"] = len(models)
model_stats["num_downloads"] = sum([m.downloads for m in models if hasattr(m, "downloads")])
if len(models) > 0:
model_stats["download_per_model"] = round(model_stats["num_downloads"] / len(models), 2)
total_downloads += model_stats["num_downloads"]
# save in overall dict
all_stats[model_name] = model_stats
for model_name in list(CONFIG_MAPPING_NAMES.keys()):
all_stats[model_name]["%_of_all_downloads"] = round(all_stats[model_name]["num_downloads"] / total_downloads, 5) * 100 # noqa: E501
downloads = all_stats[model_name]["num_downloads"]
all_stats[model_name]["num_downloads"] = f"{downloads:,}"
sorted_results = dict(reversed(sorted(all_stats.items(), key=lambda d: d[1]["%_of_all_downloads"])))
dataframe = pd.DataFrame.from_dict(sorted_results, orient="index")
# give header to model names
result = "model_names" + dataframe.to_csv()
return result
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL)
if not os.path.isfile(DATA_FILE):
print("Create datafile...")
result = retrieve_model_stats()
if not os.path.isfile(DATA_FILE):
with open(DATA_FILE, "w") as f:
f.write(result)
commit_url = repo.push_to_hub()
print(commit_url)
with open(DATA_FILE, "r") as f:
dataframe = pd.read_csv(DATA_FILE)
int_downloads = np.array([int(x.replace(",", "")) for x in dataframe["num_downloads"].values])
st.title(f"Transformers stats for year {year} and week {week}")
# print top 20 downloads
source = pd.DataFrame({
'Number of total downloads': int_downloads[:20],
'Model architecture name': dataframe["model_names"].values[:20],
})
bar_chart = alt.Chart(source).mark_bar().encode(
y="Number of total downloads",
x=alt.X("Model architecture name", sort=None),
)
st.title("Top 20 downloads last 30 days")
st.altair_chart(bar_chart, use_container_width=True)
# print bottom 20 downloads
source = pd.DataFrame({
'Number of total downloads': int_downloads[-20:],
'Model architecture name': dataframe["model_names"].values[-20:],
})
bar_chart = alt.Chart(source).mark_bar().encode(
y="Number of total downloads",
x=alt.X("Model architecture name", sort=None),
)
st.title("Bottom 20 downloads last 30 days")
st.altair_chart(bar_chart, use_container_width=True)
# print all stats
st.title("All stats last 30 days")
st.table(dataframe)
|