|
|
|
"""
|
|
Validate a trained YOLOv5 segment model on a segment dataset.
|
|
|
|
Usage:
|
|
$ bash data/scripts/get_coco.sh --val --segments # download COCO-segments val split (1G, 5000 images)
|
|
$ python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 # validate COCO-segments
|
|
|
|
Usage - formats:
|
|
$ python segment/val.py --weights yolov5s-seg.pt # PyTorch
|
|
yolov5s-seg.torchscript # TorchScript
|
|
yolov5s-seg.onnx # ONNX Runtime or OpenCV DNN with --dnn
|
|
yolov5s-seg_openvino_label # OpenVINO
|
|
yolov5s-seg.engine # TensorRT
|
|
yolov5s-seg.mlmodel # CoreML (macOS-only)
|
|
yolov5s-seg_saved_model # TensorFlow SavedModel
|
|
yolov5s-seg.pb # TensorFlow GraphDef
|
|
yolov5s-seg.tflite # TensorFlow Lite
|
|
yolov5s-seg_edgetpu.tflite # TensorFlow Edge TPU
|
|
yolov5s-seg_paddle_model # PaddlePaddle
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
from multiprocessing.pool import ThreadPool
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import torch
|
|
from tqdm import tqdm
|
|
|
|
FILE = Path(__file__).resolve()
|
|
ROOT = FILE.parents[1]
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.append(str(ROOT))
|
|
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))
|
|
|
|
import torch.nn.functional as F
|
|
|
|
from models.common import DetectMultiBackend
|
|
from models.yolo import SegmentationModel
|
|
from utils.callbacks import Callbacks
|
|
from utils.general import (
|
|
LOGGER,
|
|
NUM_THREADS,
|
|
TQDM_BAR_FORMAT,
|
|
Profile,
|
|
check_dataset,
|
|
check_img_size,
|
|
check_requirements,
|
|
check_yaml,
|
|
coco80_to_coco91_class,
|
|
colorstr,
|
|
increment_path,
|
|
non_max_suppression,
|
|
print_args,
|
|
scale_boxes,
|
|
xywh2xyxy,
|
|
xyxy2xywh,
|
|
)
|
|
from utils.metrics import ConfusionMatrix, box_iou
|
|
from utils.plots import output_to_target, plot_val_study
|
|
from utils.segment.dataloaders import create_dataloader
|
|
from utils.segment.general import mask_iou, process_mask, process_mask_native, scale_image
|
|
from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
|
|
from utils.segment.plots import plot_images_and_masks
|
|
from utils.torch_utils import de_parallel, select_device, smart_inference_mode
|
|
|
|
|
|
def save_one_txt(predn, save_conf, shape, file):
|
|
"""Saves detection results in txt format; includes class, xywh (normalized), optionally confidence if `save_conf` is
|
|
True.
|
|
"""
|
|
gn = torch.tensor(shape)[[1, 0, 1, 0]]
|
|
for *xyxy, conf, cls in predn.tolist():
|
|
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
|
|
line = (cls, *xywh, conf) if save_conf else (cls, *xywh)
|
|
with open(file, "a") as f:
|
|
f.write(("%g " * len(line)).rstrip() % line + "\n")
|
|
|
|
|
|
def save_one_json(predn, jdict, path, class_map, pred_masks):
|
|
"""
|
|
Saves a JSON file with detection results including bounding boxes, category IDs, scores, and segmentation masks.
|
|
|
|
Example JSON result: {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}.
|
|
"""
|
|
from pycocotools.mask import encode
|
|
|
|
def single_encode(x):
|
|
"""Encodes binary mask arrays into RLE (Run-Length Encoding) format for JSON serialization."""
|
|
rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
|
|
rle["counts"] = rle["counts"].decode("utf-8")
|
|
return rle
|
|
|
|
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
|
|
box = xyxy2xywh(predn[:, :4])
|
|
box[:, :2] -= box[:, 2:] / 2
|
|
pred_masks = np.transpose(pred_masks, (2, 0, 1))
|
|
with ThreadPool(NUM_THREADS) as pool:
|
|
rles = pool.map(single_encode, pred_masks)
|
|
for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
|
|
jdict.append(
|
|
{
|
|
"image_id": image_id,
|
|
"category_id": class_map[int(p[5])],
|
|
"bbox": [round(x, 3) for x in b],
|
|
"score": round(p[4], 5),
|
|
"segmentation": rles[i],
|
|
}
|
|
)
|
|
|
|
|
|
def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
|
|
"""
|
|
Return correct prediction matrix
|
|
Arguments:
|
|
detections (array[N, 6]), x1, y1, x2, y2, conf, class
|
|
labels (array[M, 5]), class, x1, y1, x2, y2
|
|
Returns:
|
|
correct (array[N, 10]), for 10 IoU levels.
|
|
"""
|
|
if masks:
|
|
if overlap:
|
|
nl = len(labels)
|
|
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
|
|
gt_masks = gt_masks.repeat(nl, 1, 1)
|
|
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
|
|
if gt_masks.shape[1:] != pred_masks.shape[1:]:
|
|
gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
|
|
gt_masks = gt_masks.gt_(0.5)
|
|
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
|
|
else:
|
|
iou = box_iou(labels[:, 1:], detections[:, :4])
|
|
|
|
correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
|
|
correct_class = labels[:, 0:1] == detections[:, 5]
|
|
for i in range(len(iouv)):
|
|
x = torch.where((iou >= iouv[i]) & correct_class)
|
|
if x[0].shape[0]:
|
|
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
|
|
if x[0].shape[0] > 1:
|
|
matches = matches[matches[:, 2].argsort()[::-1]]
|
|
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
|
|
|
|
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
|
|
correct[matches[:, 1].astype(int), i] = True
|
|
return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
|
|
|
|
|
|
@smart_inference_mode()
|
|
def run(
|
|
data,
|
|
weights=None,
|
|
batch_size=32,
|
|
imgsz=640,
|
|
conf_thres=0.001,
|
|
iou_thres=0.6,
|
|
max_det=300,
|
|
task="val",
|
|
device="",
|
|
workers=8,
|
|
single_cls=False,
|
|
augment=False,
|
|
verbose=False,
|
|
save_txt=False,
|
|
save_hybrid=False,
|
|
save_conf=False,
|
|
save_json=False,
|
|
project=ROOT / "runs/val-seg",
|
|
name="exp",
|
|
exist_ok=False,
|
|
half=True,
|
|
dnn=False,
|
|
model=None,
|
|
dataloader=None,
|
|
save_dir=Path(""),
|
|
plots=True,
|
|
overlap=False,
|
|
mask_downsample_ratio=1,
|
|
compute_loss=None,
|
|
callbacks=Callbacks(),
|
|
):
|
|
"""Validates a YOLOv5 segmentation model on specified dataset, producing metrics, plots, and optional JSON
|
|
output.
|
|
"""
|
|
if save_json:
|
|
check_requirements("pycocotools>=2.0.6")
|
|
process = process_mask_native
|
|
else:
|
|
process = process_mask
|
|
|
|
|
|
training = model is not None
|
|
if training:
|
|
device, pt, jit, engine = next(model.parameters()).device, True, False, False
|
|
half &= device.type != "cpu"
|
|
model.half() if half else model.float()
|
|
nm = de_parallel(model).model[-1].nm
|
|
else:
|
|
device = select_device(device, batch_size=batch_size)
|
|
|
|
|
|
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)
|
|
(save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
|
|
stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
|
|
imgsz = check_img_size(imgsz, s=stride)
|
|
half = model.fp16
|
|
nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32
|
|
if engine:
|
|
batch_size = model.batch_size
|
|
else:
|
|
device = model.device
|
|
if not (pt or jit):
|
|
batch_size = 1
|
|
LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
|
|
|
|
|
|
data = check_dataset(data)
|
|
|
|
|
|
model.eval()
|
|
cuda = device.type != "cpu"
|
|
is_coco = isinstance(data.get("val"), str) and data["val"].endswith(f"coco{os.sep}val2017.txt")
|
|
nc = 1 if single_cls else int(data["nc"])
|
|
iouv = torch.linspace(0.5, 0.95, 10, device=device)
|
|
niou = iouv.numel()
|
|
|
|
|
|
if not training:
|
|
if pt and not single_cls:
|
|
ncm = model.model.nc
|
|
assert ncm == nc, (
|
|
f"{weights} ({ncm} classes) trained on different --data than what you passed ({nc} "
|
|
f"classes). Pass correct combination of --weights and --data that are trained together."
|
|
)
|
|
model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))
|
|
pad, rect = (0.0, False) if task == "speed" else (0.5, pt)
|
|
task = task if task in ("train", "val", "test") else "val"
|
|
dataloader = create_dataloader(
|
|
data[task],
|
|
imgsz,
|
|
batch_size,
|
|
stride,
|
|
single_cls,
|
|
pad=pad,
|
|
rect=rect,
|
|
workers=workers,
|
|
prefix=colorstr(f"{task}: "),
|
|
overlap_mask=overlap,
|
|
mask_downsample_ratio=mask_downsample_ratio,
|
|
)[0]
|
|
|
|
seen = 0
|
|
confusion_matrix = ConfusionMatrix(nc=nc)
|
|
names = model.names if hasattr(model, "names") else model.module.names
|
|
if isinstance(names, (list, tuple)):
|
|
names = dict(enumerate(names))
|
|
class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
|
|
s = ("%22s" + "%11s" * 10) % (
|
|
"Class",
|
|
"Images",
|
|
"Instances",
|
|
"Box(P",
|
|
"R",
|
|
"mAP50",
|
|
"mAP50-95)",
|
|
"Mask(P",
|
|
"R",
|
|
"mAP50",
|
|
"mAP50-95)",
|
|
)
|
|
dt = Profile(device=device), Profile(device=device), Profile(device=device)
|
|
metrics = Metrics()
|
|
loss = torch.zeros(4, device=device)
|
|
jdict, stats = [], []
|
|
|
|
pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT)
|
|
for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
|
|
|
|
with dt[0]:
|
|
if cuda:
|
|
im = im.to(device, non_blocking=True)
|
|
targets = targets.to(device)
|
|
masks = masks.to(device)
|
|
masks = masks.float()
|
|
im = im.half() if half else im.float()
|
|
im /= 255
|
|
nb, _, height, width = im.shape
|
|
|
|
|
|
with dt[1]:
|
|
preds, protos, train_out = model(im) if compute_loss else (*model(im, augment=augment)[:2], None)
|
|
|
|
|
|
if compute_loss:
|
|
loss += compute_loss((train_out, protos), targets, masks)[1]
|
|
|
|
|
|
targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)
|
|
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []
|
|
with dt[2]:
|
|
preds = non_max_suppression(
|
|
preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm
|
|
)
|
|
|
|
|
|
plot_masks = []
|
|
for si, (pred, proto) in enumerate(zip(preds, protos)):
|
|
labels = targets[targets[:, 0] == si, 1:]
|
|
nl, npr = labels.shape[0], pred.shape[0]
|
|
path, shape = Path(paths[si]), shapes[si][0]
|
|
correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)
|
|
correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device)
|
|
seen += 1
|
|
|
|
if npr == 0:
|
|
if nl:
|
|
stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
|
|
if plots:
|
|
confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
|
|
continue
|
|
|
|
|
|
midx = [si] if overlap else targets[:, 0] == si
|
|
gt_masks = masks[midx]
|
|
pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
|
|
|
|
|
|
if single_cls:
|
|
pred[:, 5] = 0
|
|
predn = pred.clone()
|
|
scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])
|
|
|
|
|
|
if nl:
|
|
tbox = xywh2xyxy(labels[:, 1:5])
|
|
scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1])
|
|
labelsn = torch.cat((labels[:, 0:1], tbox), 1)
|
|
correct_bboxes = process_batch(predn, labelsn, iouv)
|
|
correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True)
|
|
if plots:
|
|
confusion_matrix.process_batch(predn, labelsn)
|
|
stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))
|
|
|
|
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
|
|
if plots and batch_i < 3:
|
|
plot_masks.append(pred_masks[:15])
|
|
|
|
|
|
if save_txt:
|
|
save_one_txt(predn, save_conf, shape, file=save_dir / "labels" / f"{path.stem}.txt")
|
|
if save_json:
|
|
pred_masks = scale_image(
|
|
im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]
|
|
)
|
|
save_one_json(predn, jdict, path, class_map, pred_masks)
|
|
|
|
|
|
|
|
if plots and batch_i < 3:
|
|
if len(plot_masks):
|
|
plot_masks = torch.cat(plot_masks, dim=0)
|
|
plot_images_and_masks(im, targets, masks, paths, save_dir / f"val_batch{batch_i}_labels.jpg", names)
|
|
plot_images_and_masks(
|
|
im,
|
|
output_to_target(preds, max_det=15),
|
|
plot_masks,
|
|
paths,
|
|
save_dir / f"val_batch{batch_i}_pred.jpg",
|
|
names,
|
|
)
|
|
|
|
|
|
|
|
|
|
stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]
|
|
if len(stats) and stats[0].any():
|
|
results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
|
|
metrics.update(results)
|
|
nt = np.bincount(stats[4].astype(int), minlength=nc)
|
|
|
|
|
|
pf = "%22s" + "%11i" * 2 + "%11.3g" * 8
|
|
LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
|
|
if nt.sum() == 0:
|
|
LOGGER.warning(f"WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels")
|
|
|
|
|
|
if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
|
|
for i, c in enumerate(metrics.ap_class_index):
|
|
LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
|
|
|
|
|
|
t = tuple(x.t / seen * 1e3 for x in dt)
|
|
if not training:
|
|
shape = (batch_size, 3, imgsz, imgsz)
|
|
LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
|
|
|
|
|
|
if plots:
|
|
confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
|
|
|
|
|
|
mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results()
|
|
|
|
|
|
if save_json and len(jdict):
|
|
w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ""
|
|
anno_json = str(Path("../datasets/coco/annotations/instances_val2017.json"))
|
|
pred_json = str(save_dir / f"{w}_predictions.json")
|
|
LOGGER.info(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
|
|
with open(pred_json, "w") as f:
|
|
json.dump(jdict, f)
|
|
|
|
try:
|
|
from pycocotools.coco import COCO
|
|
from pycocotools.cocoeval import COCOeval
|
|
|
|
anno = COCO(anno_json)
|
|
pred = anno.loadRes(pred_json)
|
|
results = []
|
|
for eval in COCOeval(anno, pred, "bbox"), COCOeval(anno, pred, "segm"):
|
|
if is_coco:
|
|
eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]
|
|
eval.evaluate()
|
|
eval.accumulate()
|
|
eval.summarize()
|
|
results.extend(eval.stats[:2])
|
|
map_bbox, map50_bbox, map_mask, map50_mask = results
|
|
except Exception as e:
|
|
LOGGER.info(f"pycocotools unable to run: {e}")
|
|
|
|
|
|
model.float()
|
|
if not training:
|
|
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
|
|
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
|
|
final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask
|
|
return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t
|
|
|
|
|
|
def parse_opt():
|
|
"""Parses command line arguments for configuring YOLOv5 options like dataset path, weights, batch size, and
|
|
inference settings.
|
|
"""
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
|
|
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
|
|
parser.add_argument("--batch-size", type=int, default=32, help="batch size")
|
|
parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)")
|
|
parser.add_argument("--conf-thres", type=float, default=0.001, help="confidence threshold")
|
|
parser.add_argument("--iou-thres", type=float, default=0.6, help="NMS IoU threshold")
|
|
parser.add_argument("--max-det", type=int, default=300, help="maximum detections per image")
|
|
parser.add_argument("--task", default="val", help="train, val, test, speed or study")
|
|
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
|
|
parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
|
|
parser.add_argument("--single-cls", action="store_true", help="treat as single-class dataset")
|
|
parser.add_argument("--augment", action="store_true", help="augmented inference")
|
|
parser.add_argument("--verbose", action="store_true", help="report mAP by class")
|
|
parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
|
|
parser.add_argument("--save-hybrid", action="store_true", help="save label+prediction hybrid results to *.txt")
|
|
parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
|
|
parser.add_argument("--save-json", action="store_true", help="save a COCO-JSON results file")
|
|
parser.add_argument("--project", default=ROOT / "runs/val-seg", help="save results to project/name")
|
|
parser.add_argument("--name", default="exp", help="save to project/name")
|
|
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
|
|
parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
|
|
parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
|
|
opt = parser.parse_args()
|
|
opt.data = check_yaml(opt.data)
|
|
|
|
opt.save_txt |= opt.save_hybrid
|
|
print_args(vars(opt))
|
|
return opt
|
|
|
|
|
|
def main(opt):
|
|
"""Executes YOLOv5 tasks including training, validation, testing, speed, and study with configurable options."""
|
|
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
|
|
|
|
if opt.task in ("train", "val", "test"):
|
|
if opt.conf_thres > 0.001:
|
|
LOGGER.warning(f"WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results")
|
|
if opt.save_hybrid:
|
|
LOGGER.warning("WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone")
|
|
run(**vars(opt))
|
|
|
|
else:
|
|
weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
|
|
opt.half = torch.cuda.is_available() and opt.device != "cpu"
|
|
if opt.task == "speed":
|
|
|
|
opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
|
|
for opt.weights in weights:
|
|
run(**vars(opt), plots=False)
|
|
|
|
elif opt.task == "study":
|
|
|
|
for opt.weights in weights:
|
|
f = f"study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt"
|
|
x, y = list(range(256, 1536 + 128, 128)), []
|
|
for opt.imgsz in x:
|
|
LOGGER.info(f"\nRunning {f} --imgsz {opt.imgsz}...")
|
|
r, _, t = run(**vars(opt), plots=False)
|
|
y.append(r + t)
|
|
np.savetxt(f, y, fmt="%10.4g")
|
|
subprocess.run(["zip", "-r", "study.zip", "study_*.txt"])
|
|
plot_val_study(x=x)
|
|
else:
|
|
raise NotImplementedError(f'--task {opt.task} not in ("train", "val", "test", "speed", "study")')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
opt = parse_opt()
|
|
main(opt)
|
|
|