test / plot_graph.py
Weyaxi's picture
add plot graph code
847efe3 verified
raw
history blame
3.27 kB
from huggingface_hub import snapshot_download
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.ndimage import gaussian_filter1d
snapshot_download("Weyaxi/followers-leaderboard", local_dir="followers-leaderboard", repo_type="dataset", max_workers=32)
global_dataset_dfs = {}
COLORS = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
def plot_follower_comparison(author_names, dataset_path="followers-leaderboard", smooth_sigma=1.5):
if isinstance(author_names, str):
author_names = [author_names]
# Load or retrieve cached DataFrames
if dataset_path not in global_dataset_dfs:
dfs = {}
for root, _, files in os.walk(dataset_path):
if "data.csv" in files:
file_path = os.path.join(root, "data.csv")
folder_name = os.path.basename(root)
date_str = '-'.join(folder_name.split('-')[:3]).split()[0]
try:
date = datetime.strptime(date_str, "%d-%m-%Y")
df = pd.read_csv(file_path)
dfs[date] = df
except (ValueError, pd.errors.ParserError):
continue
global_dataset_dfs[dataset_path] = dfs
else:
dfs = global_dataset_dfs[dataset_path]
plt.figure(figsize=(14, 8))
ax = plt.gca()
valid_authors = []
for idx, author in enumerate(author_names):
date_count = {}
for date, df in dfs.items():
if author in df['Author'].values:
count = df.loc[df['Author'] == author, 'Number of Followers'].iloc[0]
date_count[date] = count
if date_count:
dates, counts = zip(*sorted(date_count.items()))
counts = np.array(counts, dtype=np.float32)
# Apply Gaussian smoothing if requested
if smooth_sigma > 0 and len(counts) > 1:
counts = gaussian_filter1d(counts, sigma=smooth_sigma)
# Plot styling
color = COLORS[idx % len(COLORS)]
style = ['-', '--', '-.', ':'][idx % 4]
marker = ['o', 's', 'D', '^', 'v'][idx % 5] if idx < 5 else None
ax.plot(dates, counts,
linestyle=style,
color=color,
marker=marker,
markersize=7 if marker else 0,
linewidth=2.5,
markevery=0.1,
alpha=0.9,
label=author)
valid_authors.append(author)
if not valid_authors:
print("No valid data found for any authors.")
return
# Plot configuration
plt.title(f'Follower Trend Comparison ({", ".join(valid_authors)})', pad=20)
plt.xlabel('Date', labelpad=15)
plt.ylabel('Followers', labelpad=15)
plt.ylim(bottom=0)
plt.grid(True, alpha=0.25)
# Date formatting
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%d-%m-%Y'))
plt.xticks(rotation=45, ha='right')
# Legend and layout
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), framealpha=0.9)
plt.tight_layout()
plt.show()