add plot graph code
Browse files- plot_graph.py +93 -0
plot_graph.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import snapshot_download
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from datetime import datetime
|
7 |
+
from scipy.ndimage import gaussian_filter1d
|
8 |
+
|
9 |
+
snapshot_download("Weyaxi/followers-leaderboard", local_dir="followers-leaderboard", repo_type="dataset", max_workers=32)
|
10 |
+
|
11 |
+
|
12 |
+
global_dataset_dfs = {}
|
13 |
+
COLORS = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
|
14 |
+
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
|
15 |
+
|
16 |
+
|
17 |
+
def plot_follower_comparison(author_names, dataset_path="followers-leaderboard", smooth_sigma=1.5):
|
18 |
+
if isinstance(author_names, str):
|
19 |
+
author_names = [author_names]
|
20 |
+
|
21 |
+
# Load or retrieve cached DataFrames
|
22 |
+
if dataset_path not in global_dataset_dfs:
|
23 |
+
dfs = {}
|
24 |
+
for root, _, files in os.walk(dataset_path):
|
25 |
+
if "data.csv" in files:
|
26 |
+
file_path = os.path.join(root, "data.csv")
|
27 |
+
folder_name = os.path.basename(root)
|
28 |
+
date_str = '-'.join(folder_name.split('-')[:3]).split()[0]
|
29 |
+
try:
|
30 |
+
date = datetime.strptime(date_str, "%d-%m-%Y")
|
31 |
+
df = pd.read_csv(file_path)
|
32 |
+
dfs[date] = df
|
33 |
+
except (ValueError, pd.errors.ParserError):
|
34 |
+
continue
|
35 |
+
global_dataset_dfs[dataset_path] = dfs
|
36 |
+
else:
|
37 |
+
dfs = global_dataset_dfs[dataset_path]
|
38 |
+
|
39 |
+
plt.figure(figsize=(14, 8))
|
40 |
+
ax = plt.gca()
|
41 |
+
valid_authors = []
|
42 |
+
|
43 |
+
for idx, author in enumerate(author_names):
|
44 |
+
date_count = {}
|
45 |
+
for date, df in dfs.items():
|
46 |
+
if author in df['Author'].values:
|
47 |
+
count = df.loc[df['Author'] == author, 'Number of Followers'].iloc[0]
|
48 |
+
date_count[date] = count
|
49 |
+
|
50 |
+
if date_count:
|
51 |
+
dates, counts = zip(*sorted(date_count.items()))
|
52 |
+
counts = np.array(counts, dtype=np.float32)
|
53 |
+
|
54 |
+
# Apply Gaussian smoothing if requested
|
55 |
+
if smooth_sigma > 0 and len(counts) > 1:
|
56 |
+
counts = gaussian_filter1d(counts, sigma=smooth_sigma)
|
57 |
+
|
58 |
+
# Plot styling
|
59 |
+
color = COLORS[idx % len(COLORS)]
|
60 |
+
style = ['-', '--', '-.', ':'][idx % 4]
|
61 |
+
marker = ['o', 's', 'D', '^', 'v'][idx % 5] if idx < 5 else None
|
62 |
+
|
63 |
+
ax.plot(dates, counts,
|
64 |
+
linestyle=style,
|
65 |
+
color=color,
|
66 |
+
marker=marker,
|
67 |
+
markersize=7 if marker else 0,
|
68 |
+
linewidth=2.5,
|
69 |
+
markevery=0.1,
|
70 |
+
alpha=0.9,
|
71 |
+
label=author)
|
72 |
+
valid_authors.append(author)
|
73 |
+
|
74 |
+
if not valid_authors:
|
75 |
+
print("No valid data found for any authors.")
|
76 |
+
return
|
77 |
+
|
78 |
+
# Plot configuration
|
79 |
+
plt.title(f'Follower Trend Comparison ({", ".join(valid_authors)})', pad=20)
|
80 |
+
plt.xlabel('Date', labelpad=15)
|
81 |
+
plt.ylabel('Followers', labelpad=15)
|
82 |
+
plt.ylim(bottom=0)
|
83 |
+
plt.grid(True, alpha=0.25)
|
84 |
+
|
85 |
+
# Date formatting
|
86 |
+
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%d-%m-%Y'))
|
87 |
+
plt.xticks(rotation=45, ha='right')
|
88 |
+
|
89 |
+
# Legend and layout
|
90 |
+
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), framealpha=0.9)
|
91 |
+
plt.tight_layout()
|
92 |
+
plt.show()
|
93 |
+
|