# 11.2_evaluate_multilabel.py import os import json import torch import numpy as np import pandas as pd import matplotlib.pyplot as plt from pathlib import Path from datasets import load_from_disk from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer from torch.utils.data import default_collate from sklearn.metrics import classification_report, multilabel_confusion_matrix # === Ścieżki MODEL_DIR = Path("models/multilabel/") DATASET_DIR = Path("data/processed/dataset_multilabel_top30") TOP_RULES_PATH = Path("data/metadata/top_rules.json") OUT_DIR = MODEL_DIR REPORT_CSV = OUT_DIR / "classification_report.csv" REPORT_JSON = OUT_DIR / "metrics.json" CONF_MATRIX_PNG = OUT_DIR / "confusion_matrix_multilabel.png" # === Data collator dla float32 labels def collate_fn(batch): batch = default_collate(batch) batch["labels"] = batch["labels"].float() return batch # === Wczytanie top_rules with open(TOP_RULES_PATH) as f: top_rules = json.load(f) # === Wczytaj model + tokenizer print("📂 Wczytywanie modelu...") model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR) try: tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) except: print("⚠️ Brak tokenizera w modelu — pobieram z microsoft/codebert-base") tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base") tokenizer.save_pretrained(MODEL_DIR) # === Wczytaj dane i stwórz Trainer dataset = load_from_disk(str(DATASET_DIR)) trainer = Trainer(model=model, data_collator=collate_fn) # === Predykcja print("🔍 Predykcja na zbiorze testowym...") predictions = trainer.predict(dataset["test"].with_format("torch")) probs = torch.sigmoid(torch.tensor(predictions.predictions)).numpy() y_pred = (probs > 0.5).astype(int) y_true = predictions.label_ids # === Raport klasyfikacji print("📊 Raport klasyfikacji:") report_dict = classification_report( y_true, y_pred, target_names=top_rules, zero_division=0, output_dict=True ) report_text = classification_report(y_true, y_pred, target_names=top_rules, zero_division=0) print(report_text) # === Zapis raportów pd.DataFrame(report_dict).transpose().to_csv(REPORT_CSV) with open(REPORT_JSON, "w") as f: json.dump(report_dict, f, indent=2) print(f"💾 Zapisano raport CSV: {REPORT_CSV}") print(f"💾 Zapisano metryki JSON: {REPORT_JSON}") # === Macierz błędów (sumaryczna) print("🧱 Generuję multilabel confusion matrix...") mcm = multilabel_confusion_matrix(y_true, y_pred) support = y_true.sum(axis=0).astype(int) fig, ax = plt.subplots(figsize=(12, 8)) bars = plt.barh(range(len(top_rules)), support) plt.yticks(range(len(top_rules)), top_rules) plt.xlabel("Liczba wystąpień w zbiorze testowym") plt.title("🔢 Rozkład występowania reguł w testowym zbiorze") for i, bar in enumerate(bars): width = bar.get_width() plt.text(width + 1, bar.get_y() + bar.get_height() / 2, str(support[i]), va='center') plt.tight_layout() plt.savefig(CONF_MATRIX_PNG) plt.close() print(f"🖼️ Zapisano confusion matrix jako PNG: {CONF_MATRIX_PNG}")