Spaces:
Sleeping
Sleeping
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
st.set_page_config(layout="wide") | |
st.header("HuggingFace π€ Posts leaderboard") | |
st.write( | |
"""Data Source: https://huggingface.co/datasets/maxiw/hf-posts""" | |
) | |
df = pd.read_json("hf://datasets/maxiw/hf-posts/posts.jsonl", lines=True) | |
df["publishedAt"] = pd.to_datetime(df.publishedAt) | |
# print(df.columns) | |
# Define the metrics | |
metrics = ["totalUniqueImpressions", "totalReactions", "numComments", "Num of posts"] | |
# Get min and max dates from the DataFrame | |
min_date = df["publishedAt"].min().to_pydatetime() | |
max_date = df["publishedAt"].max().to_pydatetime() | |
# Create columns for the slider and the selectbox | |
col1, col2 = st.columns([3, 1]) # Adjust the width ratio as needed | |
with col1: | |
date_range = st.slider( | |
"Select Date Range", | |
min_value=min_date, | |
max_value=max_date, | |
value=(min_date, max_date), | |
format="DD/MMM/YYYY", | |
) | |
with col2: | |
selected_metric = st.selectbox( | |
"Sort by:", | |
options=metrics, | |
index=0, | |
) | |
# Filter the DataFrame based on selected date range | |
mask = df["publishedAt"].between(*date_range) | |
df = df[mask] | |
df["Name"] = df.author.apply(lambda x: x["fullname"]) | |
df["username"] = df.author.apply(lambda x: x["name"]) | |
df["totalReactions"] = df.reactions.apply(lambda x: sum([_["count"] for _ in x])) | |
df["Num of posts"] = 1 | |
data = ( | |
df.groupby(["username", "Name"])[metrics] | |
.sum() | |
.sort_values(selected_metric, ascending=False) | |
.reset_index() | |
) | |
data.index = np.arange(1, len(data) + 1) | |
data.index.name = "Rank" | |
def make_clickable(val): | |
return f'<a target="_blank" href="https://huggingface.co/{val}">{val}</a>' | |
df_styled = data.style.format({"username": make_clickable}) | |
st.write( | |
f"""<center>{df_styled.to_html(escape=False, index=False)}""", | |
unsafe_allow_html=True, | |
) | |
# st.dataframe(data=df_styled, width=100_000) | |