model-usage / app.py
nazneen's picture
plot details
7fec6b3
raw
history blame
3.2 kB
## LIBRARIES ###
from cProfile import label
from tkinter import font
from turtle import width
import streamlit as st
import pandas as pd
from datetime import datetime
import plotly.express as px
def read_file_to_df(file):
return pd.read_csv(file)
def date_range(df):
time = df.index.to_list()
time_range = []
for t in time:
time_range.append(str(datetime.strptime(t, '%Y-%m-%dT%H:%M:%S.%fZ').date().month) +'/' + str(datetime.strptime(t, '%Y-%m-%dT%H:%M:%S.%fZ').date().day))
return time_range
if __name__ == "__main__":
### STREAMLIT APP CONGFIG ###
st.set_page_config(layout="wide", page_title="HF Hub Model Usage Visualization")
st.header("Model Usage Visualization")
with st.expander("How to read and interact with the plot:"):
st.markdown("The plots below visualize model usage for HF models created in mid 2021 (top) vs. models created in mid 2022 (bottom). Note the y-axis range is different for each plot.")
st.markdown("The plots are categorized based on model popularity. I first created a histogram of weekly mean usage across all models and then grouped them into these categorizes so that the plots are easier to read.")
st.markdown("The plots are interactive. Hover over the points to see the model name and the number of weekly mean usage. Click on the legend to hide/show the models.")
popularity = st.radio("Model popularity", ('Low', 'Moderate', 'High'), key = "popularity", index=2, horizontal = True)
with st.container():
df_2021 = read_file_to_df("./assets/2021/model_init_time.csv")
df_2021.fillna(0, inplace=True)
df_plot = df_2021.set_index('Model').T
df_plot.index = date_range(df_plot)
df_plot_2021 = pd.DataFrame()
if popularity == 'Low':
df_plot_2021 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<=5000) & (df_plot.mean(axis=0)>=3500)]]
elif popularity == 'Moderate':
df_plot_2021 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<=40000) & (df_plot.mean(axis=0)>=5000)]]
else:
df_plot_2021 = df_plot[df_plot.columns[df_plot.mean(axis=0)>=40000]]
fig = px.line(df_plot_2021, title="Models created in 2021", labels={"index": "Weeks", "value": "Usage", "variable": "Model"})
st.plotly_chart(fig, use_container_width=True)
with st.container():
df_2022 = read_file_to_df("./assets/2022/model_init_time.csv")
df_2022.fillna(0, inplace=True)
df_plot = df_2022.set_index('Model').T
df_plot.index = date_range(df_plot)
df_plot_2022 = pd.DataFrame()
if popularity == 'Low':
df_plot_2022 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<500) & (df_plot.mean(axis=0)>=300)]]
elif popularity == 'Moderate':
df_plot_2022 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<=1500) & (df_plot.mean(axis=0)>=500)]]
else:
df_plot_2022 = df_plot[df_plot.columns[df_plot.mean(axis=0)>=1500]]
fig = px.line(df_plot_2022, title="Models created in 2022", labels={"index": "Weeks", "value": "Usage", "variable": "Model"})
st.plotly_chart(fig, use_container_width=True)