from huggingface_hub import snapshot_download import os import numpy as np import pandas as pd import matplotlib.pyplot as plt from datetime import datetime from scipy.ndimage import gaussian_filter1d snapshot_download("Weyaxi/followers-leaderboard", local_dir="followers-leaderboard", repo_type="dataset", max_workers=32) global_dataset_dfs = {} COLORS = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] def plot_follower_comparison(author_names, dataset_path="followers-leaderboard", smooth_sigma=1.5): if isinstance(author_names, str): author_names = [author_names] # Load or retrieve cached DataFrames if dataset_path not in global_dataset_dfs: dfs = {} for root, _, files in os.walk(dataset_path): if "data.csv" in files: file_path = os.path.join(root, "data.csv") folder_name = os.path.basename(root) date_str = '-'.join(folder_name.split('-')[:3]).split()[0] try: date = datetime.strptime(date_str, "%d-%m-%Y") df = pd.read_csv(file_path) dfs[date] = df except (ValueError, pd.errors.ParserError): continue global_dataset_dfs[dataset_path] = dfs else: dfs = global_dataset_dfs[dataset_path] plt.figure(figsize=(14, 8)) ax = plt.gca() valid_authors = [] for idx, author in enumerate(author_names): date_count = {} for date, df in dfs.items(): if author in df['Author'].values: count = df.loc[df['Author'] == author, 'Number of Followers'].iloc[0] date_count[date] = count if date_count: dates, counts = zip(*sorted(date_count.items())) counts = np.array(counts, dtype=np.float32) # Apply Gaussian smoothing if requested if smooth_sigma > 0 and len(counts) > 1: counts = gaussian_filter1d(counts, sigma=smooth_sigma) # Plot styling color = COLORS[idx % len(COLORS)] style = ['-', '--', '-.', ':'][idx % 4] marker = ['o', 's', 'D', '^', 'v'][idx % 5] if idx < 5 else None ax.plot(dates, counts, linestyle=style, color=color, marker=marker, markersize=7 if marker else 0, linewidth=2.5, markevery=0.1, alpha=0.9, label=author) valid_authors.append(author) if not valid_authors: print("No valid data found for any authors.") return # Plot configuration plt.title(f'Follower Trend Comparison ({", ".join(valid_authors)})', pad=20) plt.xlabel('Date', labelpad=15) plt.ylabel('Followers', labelpad=15) plt.ylim(bottom=0) plt.grid(True, alpha=0.25) # Date formatting ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%d-%m-%Y')) plt.xticks(rotation=45, ha='right') # Legend and layout plt.legend(loc='upper left', bbox_to_anchor=(1, 1), framealpha=0.9) plt.tight_layout() plt.show()