davanstrien HF staff commited on
Commit
02b09bd
·
1 Parent(s): 36ed5fd

add filter option

Browse files
Files changed (1) hide show
  1. app.py +66 -17
app.py CHANGED
@@ -1,9 +1,13 @@
1
  from huggingface_hub import list_models
2
- from cachetools import cached, TTLCache
3
- from toolz import groupby, valmap
4
  import gradio as gr
5
  from tqdm.auto import tqdm
6
  import pandas as pd
 
 
 
 
 
7
 
8
 
9
  @cached(TTLCache(maxsize=10, ttl=60 * 60 * 3))
@@ -24,12 +28,14 @@ def has_base_model_info(model):
24
 
25
 
26
  grouped_by_has_base_model_info = groupby(has_base_model_info, get_all_models())
27
- print(valmap(len, grouped_by_has_base_model_info))
28
 
29
- summary = f"""{len(grouped_by_has_base_model_info.get(True)):,} models have base model info.
 
 
30
  {len(grouped_by_has_base_model_info.get(False)):,} models don't have base model info.
31
  Currently {round(len(grouped_by_has_base_model_info.get(True))/len(get_all_models())*100,2)}% of models have base model info."""
32
 
 
33
  models_with_base_model_info = grouped_by_has_base_model_info.get(True)
34
  base_models = [
35
  model.cardData.get("base_model") for model in models_with_base_model_info
@@ -38,11 +44,18 @@ df = pd.DataFrame(
38
  pd.DataFrame({"base_model": base_models}).value_counts()
39
  ).reset_index()
40
  df_with_org = df.copy(deep=True)
 
 
 
 
41
 
42
 
43
  def parse_org(hub_id):
44
  parts = hub_id.split("/")
45
- return parts[0] if len(parts) == 2 else "huggingface"
 
 
 
46
 
47
 
48
  df_with_org["org"] = df_with_org["base_model"].apply(parse_org)
@@ -70,6 +83,41 @@ def return_models_for_base_model(base_model):
70
  return results
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  with gr.Blocks() as demo:
74
  gr.Markdown(
75
  "# Base model explorer: explore the lineage of models on the 🤗 Hub"
@@ -78,23 +126,24 @@ with gr.Blocks() as demo:
78
  """When sharing models to the Hub it is possible to specify a base model in the model card i.e. that your model is a fine-tuned version of [bert-base-cased](https://huggingface.co/bert-base-cased).
79
  This Space allows you to find children models for a given base model and view the popularity of models for fine-tuning."""
80
  )
81
- gr.Markdown(summary)
82
- gr.Markdown("### Find all models trained from a base model")
83
  base_model = gr.Dropdown(all_base_models, label="Base Model")
84
  results = gr.Markdown()
85
  base_model.change(return_models_for_base_model, base_model, results)
 
 
 
 
 
 
86
  with gr.Accordion("Base model popularity ranking", open=False):
87
- gr.DataFrame(df.head(50))
 
88
  with gr.Accordion("Base model popularity ranking by organization", open=False):
89
- gr.DataFrame(
90
- pd.DataFrame(
91
- df_with_org.groupby("org")["count"]
92
- .sum()
93
- .sort_values(ascending=False)
94
- .head(50)
95
- )
96
- .reset_index()
97
- .sort_values("count", ascending=False)
98
  )
99
 
100
 
 
1
  from huggingface_hub import list_models
2
+ from toolz import groupby
 
3
  import gradio as gr
4
  from tqdm.auto import tqdm
5
  import pandas as pd
6
+ from cachetools import cached, TTLCache
7
+
8
+ # from diskcache import Cache
9
+
10
+ # cache = Cache("cache")
11
 
12
 
13
  @cached(TTLCache(maxsize=10, ttl=60 * 60 * 3))
 
28
 
29
 
30
  grouped_by_has_base_model_info = groupby(has_base_model_info, get_all_models())
 
31
 
32
+
33
+ def produce_summary():
34
+ return f"""{len(grouped_by_has_base_model_info.get(True)):,} models have base model info.
35
  {len(grouped_by_has_base_model_info.get(False)):,} models don't have base model info.
36
  Currently {round(len(grouped_by_has_base_model_info.get(True))/len(get_all_models())*100,2)}% of models have base model info."""
37
 
38
+
39
  models_with_base_model_info = grouped_by_has_base_model_info.get(True)
40
  base_models = [
41
  model.cardData.get("base_model") for model in models_with_base_model_info
 
44
  pd.DataFrame({"base_model": base_models}).value_counts()
45
  ).reset_index()
46
  df_with_org = df.copy(deep=True)
47
+ pipeline_tags = [x.pipeline_tag for x in models_with_base_model_info]
48
+ unique_pipeline_tags = list(
49
+ {x.pipeline_tag for x in models_with_base_model_info if x.pipeline_tag is not None}
50
+ )
51
 
52
 
53
  def parse_org(hub_id):
54
  parts = hub_id.split("/")
55
+ if len(parts) == 2:
56
+ return parts[0] if parts[0] != '.' else None
57
+ else:
58
+ return "huggingface"
59
 
60
 
61
  df_with_org["org"] = df_with_org["base_model"].apply(parse_org)
 
83
  return results
84
 
85
 
86
+ def return_base_model_popularity(pipeline=None):
87
+ df_with_pipeline_info = (
88
+ pd.DataFrame({"base_model": base_models, "pipeline": pipeline_tags})
89
+ .value_counts()
90
+ .reset_index()
91
+ )
92
+
93
+ if pipeline is not None:
94
+ df_with_pipeline_info = df_with_pipeline_info[
95
+ df_with_pipeline_info["pipeline"] == pipeline
96
+ ]
97
+ keep_columns = ["base_model", "count"]
98
+ return df_with_pipeline_info[keep_columns].head(50)
99
+
100
+
101
+ def return_base_model_popularity_by_org(pipeline=None):
102
+ df_with_pipeline_info = pd.DataFrame(
103
+ {"base_model": base_models, "pipeline": pipeline_tags}
104
+ )
105
+ df_with_pipeline_info["org"] = df_with_pipeline_info["base_model"].apply(parse_org)
106
+ df_with_pipeline_info = df_with_pipeline_info.dropna(subset=["org"])
107
+ df_with_org = df_with_pipeline_info.copy(deep=True)
108
+ if pipeline is not None:
109
+ df_with_org = df_with_pipeline_info[df_with_org["pipeline"] == pipeline]
110
+ df_with_org = df_with_org.drop(columns=["pipeline"])
111
+ df_with_org = pd.DataFrame(df_with_org.value_counts())
112
+ return pd.DataFrame(
113
+ df_with_org.groupby("org")["count"]
114
+ .sum()
115
+ .sort_values(ascending=False)
116
+ .reset_index()
117
+ .head(50)
118
+ )
119
+
120
+
121
  with gr.Blocks() as demo:
122
  gr.Markdown(
123
  "# Base model explorer: explore the lineage of models on the 🤗 Hub"
 
126
  """When sharing models to the Hub it is possible to specify a base model in the model card i.e. that your model is a fine-tuned version of [bert-base-cased](https://huggingface.co/bert-base-cased).
127
  This Space allows you to find children models for a given base model and view the popularity of models for fine-tuning."""
128
  )
129
+ gr.Markdown(produce_summary())
130
+ gr.Markdown("## Find all models trained from a base model")
131
  base_model = gr.Dropdown(all_base_models, label="Base Model")
132
  results = gr.Markdown()
133
  base_model.change(return_models_for_base_model, base_model, results)
134
+ gr.Markdown("## Base model rankings ")
135
+ dropdown = gr.Dropdown(
136
+ choices=unique_pipeline_tags,
137
+ value=None,
138
+ label="Filter rankings by task pipeline",
139
+ )
140
  with gr.Accordion("Base model popularity ranking", open=False):
141
+ df_popularity = gr.DataFrame(return_base_model_popularity(None))
142
+ dropdown.change(return_base_model_popularity, dropdown, df_popularity)
143
  with gr.Accordion("Base model popularity ranking by organization", open=False):
144
+ df_popularity_org = gr.DataFrame(return_base_model_popularity_by_org(None))
145
+ dropdown.change(
146
+ return_base_model_popularity_by_org, dropdown, df_popularity_org
 
 
 
 
 
 
147
  )
148
 
149