trending feature
Browse files
app.py
CHANGED
|
@@ -77,7 +77,10 @@ def make_leaderboard(orgs, which_one):
|
|
| 77 |
data_rows = []
|
| 78 |
open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
|
| 79 |
|
|
|
|
|
|
|
| 80 |
for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
|
|
|
|
| 81 |
df = get_models(org, which_one)
|
| 82 |
if len(df) == 0:
|
| 83 |
continue
|
|
@@ -87,6 +90,7 @@ def make_leaderboard(orgs, which_one):
|
|
| 87 |
|
| 88 |
if which_one == "models":
|
| 89 |
open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
|
|
|
|
| 90 |
data_rows.append({
|
| 91 |
"Organization Name": org,
|
| 92 |
"Total Downloads": sum_info["Downloads"],
|
|
@@ -99,9 +103,12 @@ def make_leaderboard(orgs, which_one):
|
|
| 99 |
"Most Downloaded Model": most_info["Most Download"]["id"],
|
| 100 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
| 101 |
"Most Liked Model": most_info["Most Likes"]["id"],
|
| 102 |
-
"Most Like Count": most_info["Most Likes"]["likes"]
|
|
|
|
|
|
|
| 103 |
})
|
| 104 |
elif which_one == "datasets":
|
|
|
|
| 105 |
data_rows.append({
|
| 106 |
"Organization Name": org,
|
| 107 |
"Total Downloads": sum_info["Downloads"],
|
|
@@ -112,17 +119,22 @@ def make_leaderboard(orgs, which_one):
|
|
| 112 |
"Most Downloaded Dataset": most_info["Most Download"]["id"],
|
| 113 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
| 114 |
"Most Liked Dataset": most_info["Most Likes"]["id"],
|
| 115 |
-
"Most Like Count": most_info["Most Likes"]["likes"]
|
|
|
|
|
|
|
| 116 |
})
|
| 117 |
|
| 118 |
elif which_one == "spaces":
|
|
|
|
| 119 |
data_rows.append({
|
| 120 |
"Organization Name": org,
|
| 121 |
"Total Likes": sum_info["Likes"],
|
| 122 |
"Number of Spaces": num_things,
|
| 123 |
"Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
|
| 124 |
"Most Liked Space": most_info["Most Likes"]["id"],
|
| 125 |
-
"Most Like Count": most_info["Most Likes"]["likes"]
|
|
|
|
|
|
|
| 126 |
})
|
| 127 |
|
| 128 |
leaderboard = pd.DataFrame(data_rows)
|
|
@@ -132,6 +144,9 @@ def make_leaderboard(orgs, which_one):
|
|
| 132 |
leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
|
| 133 |
return leaderboard
|
| 134 |
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
with open("org_names.txt", "r") as f:
|
| 137 |
org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
|
|
@@ -139,7 +154,6 @@ with open("org_names.txt", "r") as f:
|
|
| 139 |
|
| 140 |
INTRODUCTION_TEXT = f"""
|
| 141 |
🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
|
| 142 |
-
|
| 143 |
## Available Dataframes:
|
| 144 |
|
| 145 |
- 🏛️ Models
|
|
@@ -155,6 +169,8 @@ INTRODUCTION_TEXT = f"""
|
|
| 155 |
🛠️ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
|
| 156 |
|
| 157 |
**🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
|
|
|
|
|
|
|
| 158 |
"""
|
| 159 |
|
| 160 |
def clickable(x, which_one):
|
|
@@ -164,8 +180,10 @@ def clickable(x, which_one):
|
|
| 164 |
else:
|
| 165 |
return "Not Found"
|
| 166 |
else:
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
def models_df_to_clickable(df, columns, which_one):
|
| 170 |
for column in columns:
|
| 171 |
if column == "Organization Name":
|
|
@@ -174,6 +192,34 @@ def models_df_to_clickable(df, columns, which_one):
|
|
| 174 |
df[column] = df[column].apply(lambda x: clickable(x, which_one))
|
| 175 |
return df
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
demo = gr.Blocks()
|
| 178 |
|
| 179 |
with gr.Blocks() as demo:
|
|
@@ -182,28 +228,29 @@ with gr.Blocks() as demo:
|
|
| 182 |
|
| 183 |
with gr.TabItem("🏛️ Models", id=1):
|
| 184 |
|
| 185 |
-
columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model"]
|
| 186 |
models_df = make_leaderboard(org_names_in_list, "models")
|
| 187 |
models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
|
| 188 |
|
| 189 |
-
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models", "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard", "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model", "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count"]
|
| 190 |
-
gr.Dataframe(models_df.head(400), headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
| 191 |
|
| 192 |
with gr.TabItem("📊 Datasets", id=2):
|
| 193 |
-
columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset"]
|
| 194 |
dataset_df = make_leaderboard(org_names_in_list, "datasets")
|
| 195 |
dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
|
| 196 |
|
| 197 |
-
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets", "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset", "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count"]
|
| 198 |
-
gr.Dataframe(dataset_df.head(250), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
| 199 |
|
| 200 |
with gr.TabItem("🚀 Spaces", id=3):
|
| 201 |
-
columns_to_convert = ["Organization Name", "Most Liked Space"]
|
| 202 |
|
| 203 |
spaces_df = make_leaderboard(org_names_in_list, "spaces")
|
| 204 |
spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
|
| 205 |
|
| 206 |
-
headers = ["🔢 Serial Number", "🏢 Organization Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space", "❤️ Most Liked Space", "👍 Most Like Count"]
|
| 207 |
-
gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str"])
|
| 208 |
|
| 209 |
demo.launch()
|
|
|
|
|
|
| 77 |
data_rows = []
|
| 78 |
open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
|
| 79 |
|
| 80 |
+
trend = get_trending_list(1, which_one)
|
| 81 |
+
|
| 82 |
for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
|
| 83 |
+
rank = get_ranking_trend(trend, org)
|
| 84 |
df = get_models(org, which_one)
|
| 85 |
if len(df) == 0:
|
| 86 |
continue
|
|
|
|
| 90 |
|
| 91 |
if which_one == "models":
|
| 92 |
open_llm_leaderboard_get_org = get_ranking(open_llm_leaderboard, org)
|
| 93 |
+
|
| 94 |
data_rows.append({
|
| 95 |
"Organization Name": org,
|
| 96 |
"Total Downloads": sum_info["Downloads"],
|
|
|
|
| 103 |
"Most Downloaded Model": most_info["Most Download"]["id"],
|
| 104 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
| 105 |
"Most Liked Model": most_info["Most Likes"]["id"],
|
| 106 |
+
"Most Like Count": most_info["Most Likes"]["likes"],
|
| 107 |
+
"Trending Model": rank['id'],
|
| 108 |
+
"Best Rank at Trending Models": rank['rank']
|
| 109 |
})
|
| 110 |
elif which_one == "datasets":
|
| 111 |
+
|
| 112 |
data_rows.append({
|
| 113 |
"Organization Name": org,
|
| 114 |
"Total Downloads": sum_info["Downloads"],
|
|
|
|
| 119 |
"Most Downloaded Dataset": most_info["Most Download"]["id"],
|
| 120 |
"Most Download Count": most_info["Most Download"]["downloads"],
|
| 121 |
"Most Liked Dataset": most_info["Most Likes"]["id"],
|
| 122 |
+
"Most Like Count": most_info["Most Likes"]["likes"],
|
| 123 |
+
"Trending Dataset": rank['id'],
|
| 124 |
+
"Best Rank at Trending Datasets": rank['rank']
|
| 125 |
})
|
| 126 |
|
| 127 |
elif which_one == "spaces":
|
| 128 |
+
|
| 129 |
data_rows.append({
|
| 130 |
"Organization Name": org,
|
| 131 |
"Total Likes": sum_info["Likes"],
|
| 132 |
"Number of Spaces": num_things,
|
| 133 |
"Average Likes per Space": int(sum_info["Likes"] / num_things) if num_things != 0 else 0,
|
| 134 |
"Most Liked Space": most_info["Most Likes"]["id"],
|
| 135 |
+
"Most Like Count": most_info["Most Likes"]["likes"],
|
| 136 |
+
"Trending Space": rank['id'],
|
| 137 |
+
"Best Rank at Trending Spaces": rank['rank']
|
| 138 |
})
|
| 139 |
|
| 140 |
leaderboard = pd.DataFrame(data_rows)
|
|
|
|
| 144 |
leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
|
| 145 |
return leaderboard
|
| 146 |
|
| 147 |
+
"""# Gradio başlasın
|
| 148 |
+
|
| 149 |
+
"""
|
| 150 |
|
| 151 |
with open("org_names.txt", "r") as f:
|
| 152 |
org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
|
|
|
|
| 154 |
|
| 155 |
INTRODUCTION_TEXT = f"""
|
| 156 |
🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
|
|
|
|
| 157 |
## Available Dataframes:
|
| 158 |
|
| 159 |
- 🏛️ Models
|
|
|
|
| 169 |
🛠️ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
|
| 170 |
|
| 171 |
**🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
|
| 172 |
+
*
|
| 173 |
+
*🌐 Note:** In trending models, first 300 models/datasets/spaces is being retrieved from huggingface.
|
| 174 |
"""
|
| 175 |
|
| 176 |
def clickable(x, which_one):
|
|
|
|
| 180 |
else:
|
| 181 |
return "Not Found"
|
| 182 |
else:
|
| 183 |
+
if x != "Not Found":
|
| 184 |
+
return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>'
|
| 185 |
+
return "Not Found"
|
| 186 |
+
|
| 187 |
def models_df_to_clickable(df, columns, which_one):
|
| 188 |
for column in columns:
|
| 189 |
if column == "Organization Name":
|
|
|
|
| 192 |
df[column] = df[column].apply(lambda x: clickable(x, which_one))
|
| 193 |
return df
|
| 194 |
|
| 195 |
+
|
| 196 |
+
def get_trending_list(pages, which_one):
|
| 197 |
+
trending_list = []
|
| 198 |
+
for i in range(pages):
|
| 199 |
+
json_data = requests.get(f"https://huggingface.co/{which_one}-json?p={i}").json()
|
| 200 |
+
|
| 201 |
+
for thing in json_data[which_one]:
|
| 202 |
+
id = thing["id"]
|
| 203 |
+
likes = thing["likes"]
|
| 204 |
+
|
| 205 |
+
if which_one != "spaces":
|
| 206 |
+
downloads = thing["downloads"]
|
| 207 |
+
|
| 208 |
+
trending_list.append({"id": id, "downloads": downloads, "likes": likes})
|
| 209 |
+
else:
|
| 210 |
+
trending_list.append({"id": id, "likes": likes})
|
| 211 |
+
|
| 212 |
+
return trending_list
|
| 213 |
+
|
| 214 |
+
def get_ranking_trend(json_data, org_name):
|
| 215 |
+
names = [item['id'].split("/")[0] for item in json_data]
|
| 216 |
+
models = [item['id'] for item in json_data]
|
| 217 |
+
if org_name in names:
|
| 218 |
+
temp = names.index(org_name)
|
| 219 |
+
return {"id": models[temp], "rank": temp+1}
|
| 220 |
+
else:
|
| 221 |
+
return {"id": "Not Found", "rank": "Not Found"}
|
| 222 |
+
|
| 223 |
demo = gr.Blocks()
|
| 224 |
|
| 225 |
with gr.Blocks() as demo:
|
|
|
|
| 228 |
|
| 229 |
with gr.TabItem("🏛️ Models", id=1):
|
| 230 |
|
| 231 |
+
columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model", "Trending Model"]
|
| 232 |
models_df = make_leaderboard(org_names_in_list, "models")
|
| 233 |
models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
|
| 234 |
|
| 235 |
+
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models", "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard", "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model", "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count", "🔥 Trending Model", "👑 Best Rank at Trending Models"]
|
| 236 |
+
gr.Dataframe(models_df.head(400), headers=headers, interactive=True, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str", "markdown", "str"])
|
| 237 |
|
| 238 |
with gr.TabItem("📊 Datasets", id=2):
|
| 239 |
+
columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset", "Trending Dataset"]
|
| 240 |
dataset_df = make_leaderboard(org_names_in_list, "datasets")
|
| 241 |
dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
|
| 242 |
|
| 243 |
+
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets", "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset", "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count", "🔥 Trending Dataset", "👑 Best Rank at Trending Datasets"]
|
| 244 |
+
gr.Dataframe(dataset_df.head(250), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "str", "str", "markdown", "str", "markdown", "str", "markdown", "str"])
|
| 245 |
|
| 246 |
with gr.TabItem("🚀 Spaces", id=3):
|
| 247 |
+
columns_to_convert = ["Organization Name", "Most Liked Space", "Trending Space"]
|
| 248 |
|
| 249 |
spaces_df = make_leaderboard(org_names_in_list, "spaces")
|
| 250 |
spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
|
| 251 |
|
| 252 |
+
headers = ["🔢 Serial Number", "🏢 Organization Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space", "❤️ Most Liked Space", "👍 Most Like Count", "🔥 Trending Space", "👑 Best Rank at Trending Spaces"]
|
| 253 |
+
gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
| 254 |
|
| 255 |
demo.launch()
|
| 256 |
+
|