ModelHubManager / pages /analytics.py
S-Dreamer's picture
Upload 31 files
74dd3f1 verified
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import random # Only used for demo data, remove in production
def generate_demo_data(model_ids, days=30):
"""Generate demo data for visualization purposes"""
today = datetime.now()
data = []
# For each model, generate 30 days of data
for model_id in model_ids:
base_downloads = random.randint(10, 1000)
base_likes = random.randint(5, 200)
# Generate daily data with some randomness and trend
for i in range(days):
date = (today - timedelta(days=days-i-1)).strftime("%Y-%m-%d")
daily_downloads = max(1, int(base_downloads * (1 + 0.1 * i/days) * random.uniform(0.8, 1.2)))
daily_likes = max(0, int(base_likes * (1 + 0.05 * i/days) * random.uniform(0.7, 1.3)))
data.append({
"model_id": model_id,
"date": date,
"downloads": base_downloads + daily_downloads,
"likes": base_likes + daily_likes,
"daily_downloads": daily_downloads,
"daily_likes": daily_likes
})
# Update base values for next day (cumulative)
base_downloads += daily_downloads
base_likes += daily_likes
return pd.DataFrame(data)
def render_analytics_page():
st.title("πŸ“Š Model Performance Analytics")
if "models" not in st.session_state or not st.session_state.models:
st.info("No models found. Please create or import models first.")
return
# Get model IDs from the session state
model_ids = [model.modelId for model in st.session_state.models]
# Time period selection
time_period = st.selectbox(
"Select Time Period",
["Last 7 days", "Last 30 days", "Last 90 days", "All time"],
index=1
)
# Convert time period to days
days_mapping = {
"Last 7 days": 7,
"Last 30 days": 30,
"Last 90 days": 90,
"All time": 180 # Default to 6 months for demo
}
days = days_mapping[time_period]
# In a real implementation, we would fetch this data from the Hugging Face API
# For now, generate demo data
df = generate_demo_data(model_ids, days)
# Model selection for detailed view
selected_models = st.multiselect(
"Select Models to Compare",
model_ids,
default=model_ids[:min(3, len(model_ids))]
)
if not selected_models:
st.warning("Please select at least one model to display analytics.")
return
# Filter data for selected models
filtered_df = df[df["model_id"].isin(selected_models)]
# Create tabs for different analytics views
tab1, tab2, tab3, tab4 = st.tabs(["Downloads", "Likes", "Growth Rate", "Comparison"])
with tab1:
st.subheader("Downloads Over Time")
# Cumulative downloads
fig_cumulative = px.line(
filtered_df,
x="date",
y="downloads",
color="model_id",
title="Cumulative Downloads",
labels={"downloads": "Total Downloads", "date": "Date", "model_id": "Model"}
)
st.plotly_chart(fig_cumulative, use_container_width=True)
# Daily downloads
fig_daily = px.bar(
filtered_df,
x="date",
y="daily_downloads",
color="model_id",
title="Daily Downloads",
labels={"daily_downloads": "Daily Downloads", "date": "Date", "model_id": "Model"},
barmode="group"
)
st.plotly_chart(fig_daily, use_container_width=True)
with tab2:
st.subheader("Likes Over Time")
# Cumulative likes
fig_cumulative = px.line(
filtered_df,
x="date",
y="likes",
color="model_id",
title="Cumulative Likes",
labels={"likes": "Total Likes", "date": "Date", "model_id": "Model"}
)
st.plotly_chart(fig_cumulative, use_container_width=True)
# Daily likes
fig_daily = px.bar(
filtered_df,
x="date",
y="daily_likes",
color="model_id",
title="Daily Likes",
labels={"daily_likes": "Daily Likes", "date": "Date", "model_id": "Model"},
barmode="group"
)
st.plotly_chart(fig_daily, use_container_width=True)
with tab3:
st.subheader("Growth Metrics")
# Calculate growth rates
growth_data = []
for model in selected_models:
model_data = filtered_df[filtered_df["model_id"] == model]
if len(model_data) >= 2:
first_day = model_data.iloc[0]
last_day = model_data.iloc[-1]
# Calculate download growth
if first_day["downloads"] > 0:
download_growth = (last_day["downloads"] - first_day["downloads"]) / first_day["downloads"] * 100
else:
download_growth = 100 if last_day["downloads"] > 0 else 0
# Calculate like growth
if first_day["likes"] > 0:
like_growth = (last_day["likes"] - first_day["likes"]) / first_day["likes"] * 100
else:
like_growth = 100 if last_day["likes"] > 0 else 0
growth_data.append({
"model_id": model,
"download_growth": download_growth,
"like_growth": like_growth,
"downloads": last_day["downloads"],
"likes": last_day["likes"]
})
growth_df = pd.DataFrame(growth_data)
# Show growth rates
if not growth_df.empty:
col1, col2 = st.columns(2)
with col1:
fig = px.bar(
growth_df,
x="model_id",
y="download_growth",
title="Download Growth Rate (%)",
labels={"download_growth": "Growth (%)", "model_id": "Model"},
color="download_growth",
color_continuous_scale=px.colors.sequential.Blues,
)
st.plotly_chart(fig, use_container_width=True)
with col2:
fig = px.bar(
growth_df,
x="model_id",
y="like_growth",
title="Like Growth Rate (%)",
labels={"like_growth": "Growth (%)", "model_id": "Model"},
color="like_growth",
color_continuous_scale=px.colors.sequential.Reds,
)
st.plotly_chart(fig, use_container_width=True)
else:
st.info("Not enough data to calculate growth rates.")
with tab4:
st.subheader("Model Comparison")
# Get the most recent data point for each model
latest_data = filtered_df.groupby("model_id").last().reset_index()
# Create a radar chart for model comparison
categories = ["downloads", "likes", "daily_downloads", "daily_likes"]
fig = go.Figure()
for model in latest_data["model_id"]:
model_row = latest_data[latest_data["model_id"] == model].iloc[0]
# Normalize values for radar chart (0-1 scale)
max_vals = latest_data[categories].max()
normalized_vals = [model_row[cat]/max_vals[cat] if max_vals[cat] > 0 else 0 for cat in categories]
fig.add_trace(go.Scatterpolar(
r=normalized_vals,
theta=["Total Downloads", "Total Likes", "Daily Downloads", "Daily Likes"],
fill='toself',
name=model
))
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 1]
)),
showlegend=True
)
st.plotly_chart(fig, use_container_width=True)
# Comparison table
st.subheader("Numeric Comparison")
comparison_df = latest_data[["model_id", "downloads", "likes", "daily_downloads", "daily_likes"]]
comparison_df.columns = ["Model", "Total Downloads", "Total Likes", "Daily Downloads", "Daily Likes"]
st.dataframe(comparison_df, use_container_width=True)
# Analytics insights
st.subheader("πŸ“ˆ Key Insights")
# Calculate some basic insights
if not filtered_df.empty:
# Most downloaded model
most_downloaded = filtered_df.loc[filtered_df.groupby("model_id")["downloads"].idxmax()]
# Fastest growing model in terms of downloads
growth_rates = []
for model in selected_models:
model_data = filtered_df[filtered_df["model_id"] == model]
if len(model_data) >= 2:
first_downloads = model_data.iloc[0]["downloads"]
last_downloads = model_data.iloc[-1]["downloads"]
growth_rate = (last_downloads - first_downloads) / max(1, first_downloads)
growth_rates.append((model, growth_rate))
col1, col2 = st.columns(2)
with col1:
st.info(f"πŸ’‘ Most downloaded model: **{most_downloaded['model_id']}** with **{most_downloaded['downloads']}** total downloads")
if growth_rates:
fastest_growing = max(growth_rates, key=lambda x: x[1])
st.info(f"πŸ’‘ Fastest growing model: **{fastest_growing[0]}** with a growth rate of **{fastest_growing[1]*100:.2f}%**")
with col2:
# Most liked model
most_liked = filtered_df.loc[filtered_df.groupby("model_id")["likes"].idxmax()]
st.info(f"πŸ’‘ Most liked model: **{most_liked['model_id']}** with **{most_liked['likes']}** total likes")
# Average daily downloads
avg_daily = filtered_df.groupby("model_id")["daily_downloads"].mean().reset_index()
highest_avg = avg_daily.loc[avg_daily["daily_downloads"].idxmax()]
st.info(f"πŸ’‘ Highest avg daily downloads: **{highest_avg['model_id']}** with **{highest_avg['daily_downloads']:.1f}** downloads/day")