|
from huggingface_hub import snapshot_download |
|
import os |
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from datetime import datetime |
|
from scipy.ndimage import gaussian_filter1d |
|
|
|
snapshot_download("Weyaxi/followers-leaderboard", local_dir="followers-leaderboard", repo_type="dataset", max_workers=32) |
|
|
|
|
|
global_dataset_dfs = {} |
|
COLORS = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', |
|
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] |
|
|
|
|
|
def plot_follower_comparison(author_names, dataset_path="followers-leaderboard", smooth_sigma=1.5): |
|
if isinstance(author_names, str): |
|
author_names = [author_names] |
|
|
|
|
|
if dataset_path not in global_dataset_dfs: |
|
dfs = {} |
|
for root, _, files in os.walk(dataset_path): |
|
if "data.csv" in files: |
|
file_path = os.path.join(root, "data.csv") |
|
folder_name = os.path.basename(root) |
|
date_str = '-'.join(folder_name.split('-')[:3]).split()[0] |
|
try: |
|
date = datetime.strptime(date_str, "%d-%m-%Y") |
|
df = pd.read_csv(file_path) |
|
dfs[date] = df |
|
except (ValueError, pd.errors.ParserError): |
|
continue |
|
global_dataset_dfs[dataset_path] = dfs |
|
else: |
|
dfs = global_dataset_dfs[dataset_path] |
|
|
|
plt.figure(figsize=(14, 8)) |
|
ax = plt.gca() |
|
valid_authors = [] |
|
|
|
for idx, author in enumerate(author_names): |
|
date_count = {} |
|
for date, df in dfs.items(): |
|
if author in df['Author'].values: |
|
count = df.loc[df['Author'] == author, 'Number of Followers'].iloc[0] |
|
date_count[date] = count |
|
|
|
if date_count: |
|
dates, counts = zip(*sorted(date_count.items())) |
|
counts = np.array(counts, dtype=np.float32) |
|
|
|
|
|
if smooth_sigma > 0 and len(counts) > 1: |
|
counts = gaussian_filter1d(counts, sigma=smooth_sigma) |
|
|
|
|
|
color = COLORS[idx % len(COLORS)] |
|
style = ['-', '--', '-.', ':'][idx % 4] |
|
marker = ['o', 's', 'D', '^', 'v'][idx % 5] if idx < 5 else None |
|
|
|
ax.plot(dates, counts, |
|
linestyle=style, |
|
color=color, |
|
marker=marker, |
|
markersize=7 if marker else 0, |
|
linewidth=2.5, |
|
markevery=0.1, |
|
alpha=0.9, |
|
label=author) |
|
valid_authors.append(author) |
|
|
|
if not valid_authors: |
|
print("No valid data found for any authors.") |
|
return |
|
|
|
|
|
plt.title(f'Follower Trend Comparison ({", ".join(valid_authors)})', pad=20) |
|
plt.xlabel('Date', labelpad=15) |
|
plt.ylabel('Followers', labelpad=15) |
|
plt.ylim(bottom=0) |
|
plt.grid(True, alpha=0.25) |
|
|
|
|
|
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%d-%m-%Y')) |
|
plt.xticks(rotation=45, ha='right') |
|
|
|
|
|
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), framealpha=0.9) |
|
plt.tight_layout() |
|
plt.show() |
|
|