In [13]:
import json
import pandas as pd
import matplotlib.pyplot as plt
# Wczytaj log
with open("../models/binary/training_log.json") as f:
data = json.load(f)
# Zamień na DataFrame
df = pd.DataFrame(data)
# Rozdziel na train/eval
train_df = df[df.get("loss").notna()].copy()
eval_df = df[df.get("eval_loss").notna()].copy()
print(f"🟩 Wczytano {len(train_df)} kroków treningu, 🔵 {len(eval_df)} ewaluacji")
df.head()
🟩 Wczytano 396 kroków treningu, 🔵 7 ewaluacji
Out[13]:
loss | grad_norm | learning_rate | epoch | step | eval_loss | eval_accuracy | eval_precision | eval_recall | eval_f1 | eval_runtime | eval_samples_per_second | eval_steps_per_second | train_runtime | train_samples_per_second | train_steps_per_second | total_flos | train_loss | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.3831 | 0.071747 | 0.00002 | 0.02 | 50 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 0.2821 | 0.131042 | 0.00002 | 0.03 | 100 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | 0.3882 | 0.649664 | 0.00002 | 0.05 | 150 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 0.1869 | 0.065614 | 0.00002 | 0.06 | 200 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | 0.3480 | 0.131579 | 0.00002 | 0.08 | 250 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
In [14]:
plt.figure(figsize=(12, 6))
plt.plot(train_df["epoch"], train_df["loss"], label="Train Loss", alpha=0.5, marker=".", markersize=3)
plt.plot(eval_df["epoch"], eval_df["eval_loss"], label="Eval Loss", color="orange", marker="s")
plt.title("Strata (Loss) w czasie treningu")
plt.xlabel("Epoka")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
In [15]:
plt.figure(figsize=(12, 6))
plt.plot(eval_df["epoch"], eval_df["eval_accuracy"], label="Accuracy", marker="o")
plt.plot(eval_df["epoch"], eval_df["eval_f1"], label="F1 Score", marker="s")
plt.plot(eval_df["epoch"], eval_df["eval_precision"], label="Precision", marker="^")
plt.plot(eval_df["epoch"], eval_df["eval_recall"], label="Recall", marker="v")
plt.title("Metryki ewaluacji w czasie treningu")
plt.xlabel("Epoka")
plt.ylabel("Wartość")
plt.ylim(0.90, 1.01)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
In [16]:
fig, ax1 = plt.subplots(figsize=(12, 6))
color = "tab:red"
ax1.set_xlabel("Epoka")
ax1.set_ylabel("Grad Norm", color=color)
ax1.plot(train_df["epoch"], train_df["grad_norm"], color=color, alpha=0.6, label="Grad Norm")
ax1.tick_params(axis='y', labelcolor=color)
ax1.legend(loc="upper left")
ax2 = ax1.twinx()
color = "tab:blue"
ax2.set_ylabel("Learning Rate", color=color)
ax2.plot(train_df["epoch"], train_df["learning_rate"], color=color, alpha=0.6, linestyle="--", label="LR")
ax2.tick_params(axis='y', labelcolor=color)
ax2.legend(loc="upper right")
plt.title("Gradient i learning rate w czasie treningu")
fig.tight_layout()
plt.grid(True)
plt.show()
In [17]:
best_idx = eval_df["eval_f1"].idxmax()
best_row = eval_df.loc[best_idx]
print("🏆 Najlepszy punkt ewaluacji:")
print(best_row[["epoch", "eval_loss", "eval_accuracy", "eval_precision", "eval_recall", "eval_f1"]])
🏆 Najlepszy punkt ewaluacji: epoch 5.000000 eval_loss 0.054792 eval_accuracy 0.992121 eval_precision 0.993364 eval_recall 0.998000 eval_f1 0.995677 Name: 334, dtype: float64
In [18]:
train_df.to_csv("../models/binary/train_log.csv", index=False)
eval_df.to_csv("../models/binary/eval_log.csv", index=False)
print("💾 Zapisano logi CSV")
💾 Zapisano logi CSV
In [ ]: