Weyaxi commited on
Commit
847efe3
·
verified ·
1 Parent(s): b7a5517

add plot graph code

Browse files
Files changed (1) hide show
  1. plot_graph.py +93 -0
plot_graph.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import snapshot_download
2
+ import os
3
+ import numpy as np
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ from datetime import datetime
7
+ from scipy.ndimage import gaussian_filter1d
8
+
9
+ snapshot_download("Weyaxi/followers-leaderboard", local_dir="followers-leaderboard", repo_type="dataset", max_workers=32)
10
+
11
+
12
+ global_dataset_dfs = {}
13
+ COLORS = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
14
+ '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
15
+
16
+
17
+ def plot_follower_comparison(author_names, dataset_path="followers-leaderboard", smooth_sigma=1.5):
18
+ if isinstance(author_names, str):
19
+ author_names = [author_names]
20
+
21
+ # Load or retrieve cached DataFrames
22
+ if dataset_path not in global_dataset_dfs:
23
+ dfs = {}
24
+ for root, _, files in os.walk(dataset_path):
25
+ if "data.csv" in files:
26
+ file_path = os.path.join(root, "data.csv")
27
+ folder_name = os.path.basename(root)
28
+ date_str = '-'.join(folder_name.split('-')[:3]).split()[0]
29
+ try:
30
+ date = datetime.strptime(date_str, "%d-%m-%Y")
31
+ df = pd.read_csv(file_path)
32
+ dfs[date] = df
33
+ except (ValueError, pd.errors.ParserError):
34
+ continue
35
+ global_dataset_dfs[dataset_path] = dfs
36
+ else:
37
+ dfs = global_dataset_dfs[dataset_path]
38
+
39
+ plt.figure(figsize=(14, 8))
40
+ ax = plt.gca()
41
+ valid_authors = []
42
+
43
+ for idx, author in enumerate(author_names):
44
+ date_count = {}
45
+ for date, df in dfs.items():
46
+ if author in df['Author'].values:
47
+ count = df.loc[df['Author'] == author, 'Number of Followers'].iloc[0]
48
+ date_count[date] = count
49
+
50
+ if date_count:
51
+ dates, counts = zip(*sorted(date_count.items()))
52
+ counts = np.array(counts, dtype=np.float32)
53
+
54
+ # Apply Gaussian smoothing if requested
55
+ if smooth_sigma > 0 and len(counts) > 1:
56
+ counts = gaussian_filter1d(counts, sigma=smooth_sigma)
57
+
58
+ # Plot styling
59
+ color = COLORS[idx % len(COLORS)]
60
+ style = ['-', '--', '-.', ':'][idx % 4]
61
+ marker = ['o', 's', 'D', '^', 'v'][idx % 5] if idx < 5 else None
62
+
63
+ ax.plot(dates, counts,
64
+ linestyle=style,
65
+ color=color,
66
+ marker=marker,
67
+ markersize=7 if marker else 0,
68
+ linewidth=2.5,
69
+ markevery=0.1,
70
+ alpha=0.9,
71
+ label=author)
72
+ valid_authors.append(author)
73
+
74
+ if not valid_authors:
75
+ print("No valid data found for any authors.")
76
+ return
77
+
78
+ # Plot configuration
79
+ plt.title(f'Follower Trend Comparison ({", ".join(valid_authors)})', pad=20)
80
+ plt.xlabel('Date', labelpad=15)
81
+ plt.ylabel('Followers', labelpad=15)
82
+ plt.ylim(bottom=0)
83
+ plt.grid(True, alpha=0.25)
84
+
85
+ # Date formatting
86
+ ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%d-%m-%Y'))
87
+ plt.xticks(rotation=45, ha='right')
88
+
89
+ # Legend and layout
90
+ plt.legend(loc='upper left', bbox_to_anchor=(1, 1), framealpha=0.9)
91
+ plt.tight_layout()
92
+ plt.show()
93
+