File size: 3,268 Bytes
847efe3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from huggingface_hub import snapshot_download
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.ndimage import gaussian_filter1d

snapshot_download("Weyaxi/followers-leaderboard", local_dir="followers-leaderboard", repo_type="dataset", max_workers=32)


global_dataset_dfs = {}
COLORS = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']


def plot_follower_comparison(author_names, dataset_path="followers-leaderboard", smooth_sigma=1.5):
    if isinstance(author_names, str):
        author_names = [author_names]

    # Load or retrieve cached DataFrames
    if dataset_path not in global_dataset_dfs:
        dfs = {}
        for root, _, files in os.walk(dataset_path):
            if "data.csv" in files:
                file_path = os.path.join(root, "data.csv")
                folder_name = os.path.basename(root)
                date_str = '-'.join(folder_name.split('-')[:3]).split()[0]
                try:
                    date = datetime.strptime(date_str, "%d-%m-%Y")
                    df = pd.read_csv(file_path)
                    dfs[date] = df
                except (ValueError, pd.errors.ParserError):
                    continue
        global_dataset_dfs[dataset_path] = dfs
    else:
        dfs = global_dataset_dfs[dataset_path]

    plt.figure(figsize=(14, 8))
    ax = plt.gca()
    valid_authors = []

    for idx, author in enumerate(author_names):
        date_count = {}
        for date, df in dfs.items():
            if author in df['Author'].values:
                count = df.loc[df['Author'] == author, 'Number of Followers'].iloc[0]
                date_count[date] = count

        if date_count:
            dates, counts = zip(*sorted(date_count.items()))
            counts = np.array(counts, dtype=np.float32)

            # Apply Gaussian smoothing if requested
            if smooth_sigma > 0 and len(counts) > 1:
                counts = gaussian_filter1d(counts, sigma=smooth_sigma)

            # Plot styling
            color = COLORS[idx % len(COLORS)]
            style = ['-', '--', '-.', ':'][idx % 4]
            marker = ['o', 's', 'D', '^', 'v'][idx % 5] if idx < 5 else None

            ax.plot(dates, counts,
                    linestyle=style,
                    color=color,
                    marker=marker,
                    markersize=7 if marker else 0,
                    linewidth=2.5,
                    markevery=0.1,
                    alpha=0.9,
                    label=author)
            valid_authors.append(author)

    if not valid_authors:
        print("No valid data found for any authors.")
        return

    # Plot configuration
    plt.title(f'Follower Trend Comparison ({", ".join(valid_authors)})', pad=20)
    plt.xlabel('Date', labelpad=15)
    plt.ylabel('Followers', labelpad=15)
    plt.ylim(bottom=0)
    plt.grid(True, alpha=0.25)

    # Date formatting
    ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%d-%m-%Y'))
    plt.xticks(rotation=45, ha='right')

    # Legend and layout
    plt.legend(loc='upper left', bbox_to_anchor=(1, 1), framealpha=0.9)
    plt.tight_layout()
    plt.show()