Spaces:
Runtime error
Runtime error
Commit
·
02b09bd
1
Parent(s):
36ed5fd
add filter option
Browse files
app.py
CHANGED
@@ -1,9 +1,13 @@
|
|
1 |
from huggingface_hub import list_models
|
2 |
-
from
|
3 |
-
from toolz import groupby, valmap
|
4 |
import gradio as gr
|
5 |
from tqdm.auto import tqdm
|
6 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
@cached(TTLCache(maxsize=10, ttl=60 * 60 * 3))
|
@@ -24,12 +28,14 @@ def has_base_model_info(model):
|
|
24 |
|
25 |
|
26 |
grouped_by_has_base_model_info = groupby(has_base_model_info, get_all_models())
|
27 |
-
print(valmap(len, grouped_by_has_base_model_info))
|
28 |
|
29 |
-
|
|
|
|
|
30 |
{len(grouped_by_has_base_model_info.get(False)):,} models don't have base model info.
|
31 |
Currently {round(len(grouped_by_has_base_model_info.get(True))/len(get_all_models())*100,2)}% of models have base model info."""
|
32 |
|
|
|
33 |
models_with_base_model_info = grouped_by_has_base_model_info.get(True)
|
34 |
base_models = [
|
35 |
model.cardData.get("base_model") for model in models_with_base_model_info
|
@@ -38,11 +44,18 @@ df = pd.DataFrame(
|
|
38 |
pd.DataFrame({"base_model": base_models}).value_counts()
|
39 |
).reset_index()
|
40 |
df_with_org = df.copy(deep=True)
|
|
|
|
|
|
|
|
|
41 |
|
42 |
|
43 |
def parse_org(hub_id):
|
44 |
parts = hub_id.split("/")
|
45 |
-
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
df_with_org["org"] = df_with_org["base_model"].apply(parse_org)
|
@@ -70,6 +83,41 @@ def return_models_for_base_model(base_model):
|
|
70 |
return results
|
71 |
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
with gr.Blocks() as demo:
|
74 |
gr.Markdown(
|
75 |
"# Base model explorer: explore the lineage of models on the 🤗 Hub"
|
@@ -78,23 +126,24 @@ with gr.Blocks() as demo:
|
|
78 |
"""When sharing models to the Hub it is possible to specify a base model in the model card i.e. that your model is a fine-tuned version of [bert-base-cased](https://huggingface.co/bert-base-cased).
|
79 |
This Space allows you to find children models for a given base model and view the popularity of models for fine-tuning."""
|
80 |
)
|
81 |
-
gr.Markdown(
|
82 |
-
gr.Markdown("
|
83 |
base_model = gr.Dropdown(all_base_models, label="Base Model")
|
84 |
results = gr.Markdown()
|
85 |
base_model.change(return_models_for_base_model, base_model, results)
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
with gr.Accordion("Base model popularity ranking", open=False):
|
87 |
-
gr.DataFrame(
|
|
|
88 |
with gr.Accordion("Base model popularity ranking by organization", open=False):
|
89 |
-
gr.DataFrame(
|
90 |
-
|
91 |
-
|
92 |
-
.sum()
|
93 |
-
.sort_values(ascending=False)
|
94 |
-
.head(50)
|
95 |
-
)
|
96 |
-
.reset_index()
|
97 |
-
.sort_values("count", ascending=False)
|
98 |
)
|
99 |
|
100 |
|
|
|
1 |
from huggingface_hub import list_models
|
2 |
+
from toolz import groupby
|
|
|
3 |
import gradio as gr
|
4 |
from tqdm.auto import tqdm
|
5 |
import pandas as pd
|
6 |
+
from cachetools import cached, TTLCache
|
7 |
+
|
8 |
+
# from diskcache import Cache
|
9 |
+
|
10 |
+
# cache = Cache("cache")
|
11 |
|
12 |
|
13 |
@cached(TTLCache(maxsize=10, ttl=60 * 60 * 3))
|
|
|
28 |
|
29 |
|
30 |
grouped_by_has_base_model_info = groupby(has_base_model_info, get_all_models())
|
|
|
31 |
|
32 |
+
|
33 |
+
def produce_summary():
|
34 |
+
return f"""{len(grouped_by_has_base_model_info.get(True)):,} models have base model info.
|
35 |
{len(grouped_by_has_base_model_info.get(False)):,} models don't have base model info.
|
36 |
Currently {round(len(grouped_by_has_base_model_info.get(True))/len(get_all_models())*100,2)}% of models have base model info."""
|
37 |
|
38 |
+
|
39 |
models_with_base_model_info = grouped_by_has_base_model_info.get(True)
|
40 |
base_models = [
|
41 |
model.cardData.get("base_model") for model in models_with_base_model_info
|
|
|
44 |
pd.DataFrame({"base_model": base_models}).value_counts()
|
45 |
).reset_index()
|
46 |
df_with_org = df.copy(deep=True)
|
47 |
+
pipeline_tags = [x.pipeline_tag for x in models_with_base_model_info]
|
48 |
+
unique_pipeline_tags = list(
|
49 |
+
{x.pipeline_tag for x in models_with_base_model_info if x.pipeline_tag is not None}
|
50 |
+
)
|
51 |
|
52 |
|
53 |
def parse_org(hub_id):
|
54 |
parts = hub_id.split("/")
|
55 |
+
if len(parts) == 2:
|
56 |
+
return parts[0] if parts[0] != '.' else None
|
57 |
+
else:
|
58 |
+
return "huggingface"
|
59 |
|
60 |
|
61 |
df_with_org["org"] = df_with_org["base_model"].apply(parse_org)
|
|
|
83 |
return results
|
84 |
|
85 |
|
86 |
+
def return_base_model_popularity(pipeline=None):
|
87 |
+
df_with_pipeline_info = (
|
88 |
+
pd.DataFrame({"base_model": base_models, "pipeline": pipeline_tags})
|
89 |
+
.value_counts()
|
90 |
+
.reset_index()
|
91 |
+
)
|
92 |
+
|
93 |
+
if pipeline is not None:
|
94 |
+
df_with_pipeline_info = df_with_pipeline_info[
|
95 |
+
df_with_pipeline_info["pipeline"] == pipeline
|
96 |
+
]
|
97 |
+
keep_columns = ["base_model", "count"]
|
98 |
+
return df_with_pipeline_info[keep_columns].head(50)
|
99 |
+
|
100 |
+
|
101 |
+
def return_base_model_popularity_by_org(pipeline=None):
|
102 |
+
df_with_pipeline_info = pd.DataFrame(
|
103 |
+
{"base_model": base_models, "pipeline": pipeline_tags}
|
104 |
+
)
|
105 |
+
df_with_pipeline_info["org"] = df_with_pipeline_info["base_model"].apply(parse_org)
|
106 |
+
df_with_pipeline_info = df_with_pipeline_info.dropna(subset=["org"])
|
107 |
+
df_with_org = df_with_pipeline_info.copy(deep=True)
|
108 |
+
if pipeline is not None:
|
109 |
+
df_with_org = df_with_pipeline_info[df_with_org["pipeline"] == pipeline]
|
110 |
+
df_with_org = df_with_org.drop(columns=["pipeline"])
|
111 |
+
df_with_org = pd.DataFrame(df_with_org.value_counts())
|
112 |
+
return pd.DataFrame(
|
113 |
+
df_with_org.groupby("org")["count"]
|
114 |
+
.sum()
|
115 |
+
.sort_values(ascending=False)
|
116 |
+
.reset_index()
|
117 |
+
.head(50)
|
118 |
+
)
|
119 |
+
|
120 |
+
|
121 |
with gr.Blocks() as demo:
|
122 |
gr.Markdown(
|
123 |
"# Base model explorer: explore the lineage of models on the 🤗 Hub"
|
|
|
126 |
"""When sharing models to the Hub it is possible to specify a base model in the model card i.e. that your model is a fine-tuned version of [bert-base-cased](https://huggingface.co/bert-base-cased).
|
127 |
This Space allows you to find children models for a given base model and view the popularity of models for fine-tuning."""
|
128 |
)
|
129 |
+
gr.Markdown(produce_summary())
|
130 |
+
gr.Markdown("## Find all models trained from a base model")
|
131 |
base_model = gr.Dropdown(all_base_models, label="Base Model")
|
132 |
results = gr.Markdown()
|
133 |
base_model.change(return_models_for_base_model, base_model, results)
|
134 |
+
gr.Markdown("## Base model rankings ")
|
135 |
+
dropdown = gr.Dropdown(
|
136 |
+
choices=unique_pipeline_tags,
|
137 |
+
value=None,
|
138 |
+
label="Filter rankings by task pipeline",
|
139 |
+
)
|
140 |
with gr.Accordion("Base model popularity ranking", open=False):
|
141 |
+
df_popularity = gr.DataFrame(return_base_model_popularity(None))
|
142 |
+
dropdown.change(return_base_model_popularity, dropdown, df_popularity)
|
143 |
with gr.Accordion("Base model popularity ranking by organization", open=False):
|
144 |
+
df_popularity_org = gr.DataFrame(return_base_model_popularity_by_org(None))
|
145 |
+
dropdown.change(
|
146 |
+
return_base_model_popularity_by_org, dropdown, df_popularity_org
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
)
|
148 |
|
149 |
|