In [13]:
import json
import pandas as pd
import matplotlib.pyplot as plt

# Wczytaj log
with open("../models/binary/training_log.json") as f:
    data = json.load(f)

# Zamień na DataFrame
df = pd.DataFrame(data)

# Rozdziel na train/eval
train_df = df[df.get("loss").notna()].copy()
eval_df = df[df.get("eval_loss").notna()].copy()

print(f"🟩 Wczytano {len(train_df)} kroków treningu, 🔵 {len(eval_df)} ewaluacji")
df.head()
🟩 Wczytano 396 kroków treningu, 🔵 7 ewaluacji
Out[13]:
loss grad_norm learning_rate epoch step eval_loss eval_accuracy eval_precision eval_recall eval_f1 eval_runtime eval_samples_per_second eval_steps_per_second train_runtime train_samples_per_second train_steps_per_second total_flos train_loss
0 0.3831 0.071747 0.00002 0.02 50 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 0.2821 0.131042 0.00002 0.03 100 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 0.3882 0.649664 0.00002 0.05 150 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 0.1869 0.065614 0.00002 0.06 200 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 0.3480 0.131579 0.00002 0.08 250 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [14]:
plt.figure(figsize=(12, 6))
plt.plot(train_df["epoch"], train_df["loss"], label="Train Loss", alpha=0.5, marker=".", markersize=3)
plt.plot(eval_df["epoch"], eval_df["eval_loss"], label="Eval Loss", color="orange", marker="s")
plt.title("Strata (Loss) w czasie treningu")
plt.xlabel("Epoka")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [15]:
plt.figure(figsize=(12, 6))
plt.plot(eval_df["epoch"], eval_df["eval_accuracy"], label="Accuracy", marker="o")
plt.plot(eval_df["epoch"], eval_df["eval_f1"], label="F1 Score", marker="s")
plt.plot(eval_df["epoch"], eval_df["eval_precision"], label="Precision", marker="^")
plt.plot(eval_df["epoch"], eval_df["eval_recall"], label="Recall", marker="v")
plt.title("Metryki ewaluacji w czasie treningu")
plt.xlabel("Epoka")
plt.ylabel("Wartość")
plt.ylim(0.90, 1.01)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [16]:
fig, ax1 = plt.subplots(figsize=(12, 6))

color = "tab:red"
ax1.set_xlabel("Epoka")
ax1.set_ylabel("Grad Norm", color=color)
ax1.plot(train_df["epoch"], train_df["grad_norm"], color=color, alpha=0.6, label="Grad Norm")
ax1.tick_params(axis='y', labelcolor=color)
ax1.legend(loc="upper left")

ax2 = ax1.twinx()
color = "tab:blue"
ax2.set_ylabel("Learning Rate", color=color)
ax2.plot(train_df["epoch"], train_df["learning_rate"], color=color, alpha=0.6, linestyle="--", label="LR")
ax2.tick_params(axis='y', labelcolor=color)
ax2.legend(loc="upper right")

plt.title("Gradient i learning rate w czasie treningu")
fig.tight_layout()
plt.grid(True)
plt.show()
No description has been provided for this image
In [17]:
best_idx = eval_df["eval_f1"].idxmax()
best_row = eval_df.loc[best_idx]
print("🏆 Najlepszy punkt ewaluacji:")
print(best_row[["epoch", "eval_loss", "eval_accuracy", "eval_precision", "eval_recall", "eval_f1"]])
🏆 Najlepszy punkt ewaluacji:
epoch             5.000000
eval_loss         0.054792
eval_accuracy     0.992121
eval_precision    0.993364
eval_recall       0.998000
eval_f1           0.995677
Name: 334, dtype: float64
In [18]:
train_df.to_csv("../models/binary/train_log.csv", index=False)
eval_df.to_csv("../models/binary/eval_log.csv", index=False)
print("💾 Zapisano logi CSV")
💾 Zapisano logi CSV
In [ ]: