Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from huggingface_hub import HfApi | |
from datetime import datetime | |
import numpy as np | |
def format_number(num): | |
"""Format large numbers with K, M suffix""" | |
if num >= 1e6: | |
return f"{num/1e6:.1f}M" | |
elif num >= 1e3: | |
return f"{num/1e3:.1f}K" | |
return str(num) | |
def fetch_stats(): | |
"""Fetch all DeepSeek model statistics""" | |
api = HfApi() | |
# Fetch original models | |
original_models = [ | |
"deepseek-ai/deepseek-r1", | |
"deepseek-ai/deepseek-r1-zero", | |
"deepseek-ai/deepseek-r1-distill-llama-70b", | |
"deepseek-ai/deepseek-r1-distill-qwen-32b", | |
"deepseek-ai/deepseek-r1-distill-qwen-14b", | |
"deepseek-ai/deepseek-r1-distill-llama-8b", | |
"deepseek-ai/deepseek-r1-distill-qwen-7b", | |
"deepseek-ai/deepseek-r1-distill-qwen-1.5b" | |
] | |
original_stats = [] | |
for model_id in original_models: | |
try: | |
info = api.model_info(model_id) | |
original_stats.append({ | |
'model_id': model_id, | |
'downloads_30d': info.downloads if hasattr(info, 'downloads') else 0, | |
'likes': info.likes if hasattr(info, 'likes') else 0 | |
}) | |
except Exception as e: | |
print(f"Error fetching {model_id}: {str(e)}") | |
# Fetch derivative models - using the tag format that works | |
model_types = ["adapter", "finetune", "merge", "quantized"] | |
base_models = [ | |
"DeepSeek-R1", | |
"DeepSeek-R1-Zero", | |
"DeepSeek-R1-Distill-Llama-70B", | |
"DeepSeek-R1-Distill-Qwen-32B", | |
"DeepSeek-R1-Distill-Qwen-14B", | |
"DeepSeek-R1-Distill-Llama-8B", | |
"DeepSeek-R1-Distill-Qwen-7B", | |
"DeepSeek-R1-Distill-Qwen-1.5B" | |
] | |
derivative_stats = [] | |
for base_model in base_models: | |
for model_type in model_types: | |
try: | |
# Get models for this type | |
models = list(api.list_models( | |
filter=f"base_model:{model_type}:deepseek-ai/{base_model}", | |
full=True | |
)) | |
# Add each model to our stats | |
for model in models: | |
derivative_stats.append({ | |
'base_model': f"deepseek-ai/{base_model}", | |
'model_type': model_type, | |
'model_id': model.id, | |
'downloads_30d': model.downloads if hasattr(model, 'downloads') else 0, | |
'likes': model.likes if hasattr(model, 'likes') else 0 | |
}) | |
except Exception as e: | |
print(f"Error fetching {model_type} models for {base_model}: {str(e)}") | |
# Create DataFrames | |
original_df = pd.DataFrame(original_stats, columns=['model_id', 'downloads_30d', 'likes']) | |
derivative_df = pd.DataFrame(derivative_stats, columns=['base_model', 'model_type', 'model_id', 'downloads_30d', 'likes']) | |
return original_df, derivative_df | |
def create_stats_html(): | |
"""Create HTML for displaying statistics""" | |
original_df, derivative_df = fetch_stats() | |
# Create summary statistics | |
total_originals = len(original_df) | |
total_derivatives = len(derivative_df) | |
total_downloads_orig = original_df['downloads_30d'].sum() | |
total_downloads_deriv = derivative_df['downloads_30d'].sum() | |
# Create derivative type distribution chart | |
if len(derivative_df) > 0: | |
# Create distribution by model type | |
type_dist = derivative_df.groupby('model_type').agg({ | |
'model_id': 'count', | |
'downloads_30d': 'sum' | |
}).reset_index() | |
# Format model types to be more readable | |
type_dist['model_type'] = type_dist['model_type'].str.capitalize() | |
# Create bar chart with better formatting | |
fig_types = px.bar( | |
type_dist, | |
x='model_type', | |
y='downloads_30d', | |
title='Downloads by Model Type', | |
labels={ | |
'downloads_30d': 'Downloads (last 30 days)', | |
'model_type': 'Model Type' | |
}, | |
text=type_dist['downloads_30d'].apply(format_number) # Add value labels | |
) | |
# Update layout for better readability | |
fig_types.update_traces(textposition='outside') | |
fig_types.update_layout( | |
uniformtext_minsize=8, | |
uniformtext_mode='hide', | |
xaxis_tickangle=0, | |
yaxis_title="Downloads", | |
plot_bgcolor='white', | |
bargap=0.3 | |
) | |
else: | |
# Create empty figure if no data | |
fig_types = px.bar(title='No data available') | |
# Create top models table | |
if len(derivative_df) > 0: | |
top_models = derivative_df.nlargest(10, 'downloads_30d')[ | |
['model_id', 'model_type', 'downloads_30d', 'likes'] | |
].copy() # Create a copy to avoid SettingWithCopyWarning | |
# Capitalize model types in the table | |
top_models['model_type'] = top_models['model_type'].str.capitalize() | |
# Format download numbers | |
top_models['downloads_30d'] = top_models['downloads_30d'].apply(format_number) | |
else: | |
top_models = pd.DataFrame(columns=['model_id', 'model_type', 'downloads_30d', 'likes']) | |
# Format the summary statistics | |
summary_html = f""" | |
<div style='padding: 20px; background-color: #f5f5f5; border-radius: 10px; margin-bottom: 20px;'> | |
<h3>Summary Statistics</h3> | |
<p>Derivative Models Downloads: {format_number(total_downloads_deriv)} ({total_derivatives} models)</p> | |
<p>Original Models Downloads: {format_number(total_downloads_orig)} ({total_originals} models)</p> | |
<p>Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}</p> | |
</div> | |
""" | |
return summary_html, fig_types, top_models | |
def create_interface(): | |
"""Create Gradio interface""" | |
with gr.Blocks(theme=gr.themes.Soft()) as interface: | |
gr.HTML("<h1 style='text-align: center;'>DeepSeek Models Stats</h1>") | |
with gr.Row(): | |
with gr.Column(): | |
summary_html = gr.HTML() | |
with gr.Column(): | |
plot = gr.Plot() | |
with gr.Row(): | |
table = gr.DataFrame( | |
headers=["Model ID", "Type", "Downloads (30d)", "Likes"], | |
label="Top 10 Most Downloaded Models" | |
) | |
def update_stats(): | |
summary, fig, top_models = create_stats_html() | |
return summary, fig, top_models | |
interface.load(update_stats, | |
outputs=[summary_html, plot, table]) | |
return interface | |
# Create and launch the interface | |
demo = create_interface() | |
demo.launch() |