File size: 2,095 Bytes
847efe3 9eb9b68 847efe3 9eb9b68 847efe3 9eb9b68 847efe3 9eb9b68 5d940fd 9eb9b68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
from huggingface_hub import snapshot_download
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.ndimage import gaussian_filter1d
snapshot_download("Weyaxi/followers-leaderboard", local_dir="followers-leaderboard", repo_type="dataset", max_workers=32)
global_dataset_dfs = {}
def gr_plot_follower_comparison(author_names, dataset_path="followers-leaderboard", smooth_sigma=1.5):
if isinstance(author_names, str):
author_names = [author_names]
# Load or retrieve cached DataFrames
if dataset_path not in global_dataset_dfs:
dfs = {}
for root, _, files in os.walk(dataset_path):
if "data.csv" in files:
file_path = os.path.join(root, "data.csv")
folder_name = os.path.basename(root)
date_str = '-'.join(folder_name.split('-')[:3]).split()[0]
try:
date = datetime.strptime(date_str, "%d-%m-%Y")
df = pd.read_csv(file_path)
dfs[date] = df
except (ValueError, pd.errors.ParserError):
continue
global_dataset_dfs[dataset_path] = dfs
else:
dfs = global_dataset_dfs[dataset_path]
data = []
for author in author_names:
date_count = {}
for date, df in dfs.items():
if author in df['Author'].values:
count = df.loc[df['Author'] == author, 'Number of Followers'].iloc[0]
date_count[date] = count
if date_count:
dates, counts = zip(*sorted(date_count.items()))
counts = np.array(counts, dtype=np.float32)
# Apply Gaussian smoothing if requested
if smooth_sigma > 0 and len(counts) > 1:
counts = gaussian_filter1d(counts, sigma=smooth_sigma)
for d, c in zip(dates, counts):
data.append({"x": d, "y": c, "author": author})
if not data:
return pd.DataFrame(columns=["x", "y", "author"])
return pd.DataFrame(data)
|