Spaces:
Running
Running
File size: 4,125 Bytes
aea73e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# -*- coding: utf-8 -*-
# Prepare MoNuSeg Dataset By converting and resorting files
#
# @ Fabian Hörst, [email protected]
# Institute for Artifical Intelligence in Medicine,
# University Medicine Essen
from PIL import Image
import xml.etree.ElementTree as ET
from skimage import draw
import numpy as np
from pathlib import Path
from typing import Union
import argparse
def convert_monuseg(
input_path: Union[Path, str], output_path: Union[Path, str]
) -> None:
"""Convert the MoNuSeg dataset to a new format (1000 -> 1024, tiff to png and xml to npy)
Args:
input_path (Union[Path, str]): Input dataset
output_path (Union[Path, str]): Output path
"""
input_path = Path(input_path)
output_path = Path(output_path)
output_path.mkdir(exist_ok=True, parents=True)
# testing and training
parts = ["testing", "training"]
for part in parts:
print(f"Prepare: {part}")
input_path_part = input_path / part
output_path_part = output_path / part
output_path_part.mkdir(exist_ok=True, parents=True)
(output_path_part / "images").mkdir(exist_ok=True, parents=True)
(output_path_part / "labels").mkdir(exist_ok=True, parents=True)
# images
images = [f for f in sorted((input_path_part / "images").glob("*.tif"))]
for img_path in images:
loaded_image = Image.open(img_path)
resized = loaded_image.resize(
(1024, 1024), resample=Image.Resampling.LANCZOS
)
new_img_path = output_path_part / "images" / f"{img_path.stem}.png"
resized.save(new_img_path)
# masks
annotations = [f for f in sorted((input_path_part / "labels").glob("*.xml"))]
for annot_path in annotations:
binary_mask = np.transpose(np.zeros((1000, 1000)))
# extract xml file
tree = ET.parse(annot_path)
root = tree.getroot()
child = root[0]
for x in child:
r = x.tag
if r == "Regions":
element_idx = 1
for y in x:
y_tag = y.tag
if y_tag == "Region":
regions = []
vertices = y[1]
coords = np.zeros((len(vertices), 2))
for i, vertex in enumerate(vertices):
coords[i][0] = vertex.attrib["X"]
coords[i][1] = vertex.attrib["Y"]
regions.append(coords)
vertex_row_coords = regions[0][:, 0]
vertex_col_coords = regions[0][:, 1]
fill_row_coords, fill_col_coords = draw.polygon(
vertex_col_coords, vertex_row_coords, binary_mask.shape
)
binary_mask[fill_row_coords, fill_col_coords] = element_idx
element_idx = element_idx + 1
inst_image = Image.fromarray(binary_mask)
resized_mask = np.array(
inst_image.resize((1024, 1024), resample=Image.Resampling.NEAREST)
)
new_mask_path = output_path_part / "labels" / f"{annot_path.stem}.npy"
np.save(new_mask_path, resized_mask)
print("Finished")
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Convert the MoNuSeg dataset",
)
parser.add_argument(
"--input_path",
type=str,
help="Input path of the original MoNuSeg dataset",
required=True,
)
parser.add_argument(
"--output_path",
type=str,
help="Output path to store the processed MoNuSeg dataset",
required=True,
)
if __name__ == "__main__":
opt = parser.parse_args()
configuration = vars(opt)
input_path = Path(configuration["input_path"])
output_path = Path(configuration["output_path"])
convert_monuseg(input_path=input_path, output_path=output_path)
|