Spaces:
Runtime error
Runtime error
| import json | |
| from collections import defaultdict | |
| from pathlib import Path | |
| import cv2 | |
| import numpy as np | |
| from tqdm import tqdm | |
| from ultralytics.yolo.utils.checks import check_requirements | |
| from ultralytics.yolo.utils.files import make_dirs | |
| def coco91_to_coco80_class(): | |
| """Converts 91-index COCO class IDs to 80-index COCO class IDs. | |
| Returns: | |
| (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the | |
| corresponding 91-index class ID. | |
| """ | |
| return [ | |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None, | |
| None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, | |
| 51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, | |
| None, 73, 74, 75, 76, 77, 78, 79, None] | |
| def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keypoints=False, cls91to80=True): | |
| """Converts COCO dataset annotations to a format suitable for training YOLOv5 models. | |
| Args: | |
| labels_dir (str, optional): Path to directory containing COCO dataset annotation files. | |
| use_segments (bool, optional): Whether to include segmentation masks in the output. | |
| use_keypoints (bool, optional): Whether to include keypoint annotations in the output. | |
| cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs. | |
| Raises: | |
| FileNotFoundError: If the labels_dir path does not exist. | |
| Example Usage: | |
| convert_coco(labels_dir='../coco/annotations/', use_segments=True, use_keypoints=True, cls91to80=True) | |
| Output: | |
| Generates output files in the specified output directory. | |
| """ | |
| save_dir = make_dirs('yolo_labels') # output directory | |
| coco80 = coco91_to_coco80_class() | |
| # Import json | |
| for json_file in sorted(Path(labels_dir).resolve().glob('*.json')): | |
| fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '') # folder name | |
| fn.mkdir(parents=True, exist_ok=True) | |
| with open(json_file) as f: | |
| data = json.load(f) | |
| # Create image dict | |
| images = {'%g' % x['id']: x for x in data['images']} | |
| # Create image-annotations dict | |
| imgToAnns = defaultdict(list) | |
| for ann in data['annotations']: | |
| imgToAnns[ann['image_id']].append(ann) | |
| # Write labels file | |
| for img_id, anns in tqdm(imgToAnns.items(), desc=f'Annotations {json_file}'): | |
| img = images['%g' % img_id] | |
| h, w, f = img['height'], img['width'], img['file_name'] | |
| bboxes = [] | |
| segments = [] | |
| keypoints = [] | |
| for ann in anns: | |
| if ann['iscrowd']: | |
| continue | |
| # The COCO box format is [top left x, top left y, width, height] | |
| box = np.array(ann['bbox'], dtype=np.float64) | |
| box[:2] += box[2:] / 2 # xy top-left corner to center | |
| box[[0, 2]] /= w # normalize x | |
| box[[1, 3]] /= h # normalize y | |
| if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0 | |
| continue | |
| cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1 # class | |
| box = [cls] + box.tolist() | |
| if box not in bboxes: | |
| bboxes.append(box) | |
| if use_segments and ann.get('segmentation') is not None: | |
| if len(ann['segmentation']) == 0: | |
| segments.append([]) | |
| continue | |
| if isinstance(ann['segmentation'], dict): | |
| ann['segmentation'] = rle2polygon(ann['segmentation']) | |
| if len(ann['segmentation']) > 1: | |
| s = merge_multi_segment(ann['segmentation']) | |
| s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist() | |
| else: | |
| s = [j for i in ann['segmentation'] for j in i] # all segments concatenated | |
| s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist() | |
| s = [cls] + s | |
| if s not in segments: | |
| segments.append(s) | |
| if use_keypoints and ann.get('keypoints') is not None: | |
| k = (np.array(ann['keypoints']).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist() | |
| k = box + k | |
| keypoints.append(k) | |
| # Write | |
| with open((fn / f).with_suffix('.txt'), 'a') as file: | |
| for i in range(len(bboxes)): | |
| if use_keypoints: | |
| line = *(keypoints[i]), # cls, box, keypoints | |
| else: | |
| line = *(segments[i] | |
| if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments | |
| file.write(('%g ' * len(line)).rstrip() % line + '\n') | |
| def rle2polygon(segmentation): | |
| """ | |
| Convert Run-Length Encoding (RLE) mask to polygon coordinates. | |
| Args: | |
| segmentation (dict, list): RLE mask representation of the object segmentation. | |
| Returns: | |
| (list): A list of lists representing the polygon coordinates for each contour. | |
| Note: | |
| Requires the 'pycocotools' package to be installed. | |
| """ | |
| check_requirements('pycocotools') | |
| from pycocotools import mask | |
| m = mask.decode(segmentation) | |
| m[m > 0] = 255 | |
| contours, _ = cv2.findContours(m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS) | |
| polygons = [] | |
| for contour in contours: | |
| epsilon = 0.001 * cv2.arcLength(contour, True) | |
| contour_approx = cv2.approxPolyDP(contour, epsilon, True) | |
| polygon = contour_approx.flatten().tolist() | |
| polygons.append(polygon) | |
| return polygons | |
| def min_index(arr1, arr2): | |
| """ | |
| Find a pair of indexes with the shortest distance between two arrays of 2D points. | |
| Args: | |
| arr1 (np.array): A NumPy array of shape (N, 2) representing N 2D points. | |
| arr2 (np.array): A NumPy array of shape (M, 2) representing M 2D points. | |
| Returns: | |
| (tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively. | |
| """ | |
| dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1) | |
| return np.unravel_index(np.argmin(dis, axis=None), dis.shape) | |
| def merge_multi_segment(segments): | |
| """ | |
| Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment. | |
| This function connects these coordinates with a thin line to merge all segments into one. | |
| Args: | |
| segments (List[List]): Original segmentations in COCO's JSON file. | |
| Each element is a list of coordinates, like [segmentation1, segmentation2,...]. | |
| Returns: | |
| s (List[np.ndarray]): A list of connected segments represented as NumPy arrays. | |
| """ | |
| s = [] | |
| segments = [np.array(i).reshape(-1, 2) for i in segments] | |
| idx_list = [[] for _ in range(len(segments))] | |
| # record the indexes with min distance between each segment | |
| for i in range(1, len(segments)): | |
| idx1, idx2 = min_index(segments[i - 1], segments[i]) | |
| idx_list[i - 1].append(idx1) | |
| idx_list[i].append(idx2) | |
| # use two round to connect all the segments | |
| for k in range(2): | |
| # forward connection | |
| if k == 0: | |
| for i, idx in enumerate(idx_list): | |
| # middle segments have two indexes | |
| # reverse the index of middle segments | |
| if len(idx) == 2 and idx[0] > idx[1]: | |
| idx = idx[::-1] | |
| segments[i] = segments[i][::-1, :] | |
| segments[i] = np.roll(segments[i], -idx[0], axis=0) | |
| segments[i] = np.concatenate([segments[i], segments[i][:1]]) | |
| # deal with the first segment and the last one | |
| if i in [0, len(idx_list) - 1]: | |
| s.append(segments[i]) | |
| else: | |
| idx = [0, idx[1] - idx[0]] | |
| s.append(segments[i][idx[0]:idx[1] + 1]) | |
| else: | |
| for i in range(len(idx_list) - 1, -1, -1): | |
| if i not in [0, len(idx_list) - 1]: | |
| idx = idx_list[i] | |
| nidx = abs(idx[1] - idx[0]) | |
| s.append(segments[i][nidx:]) | |
| return s | |
| def delete_dsstore(path='../datasets'): | |
| """Delete Apple .DS_Store files in the specified directory and its subdirectories.""" | |
| from pathlib import Path | |
| files = list(Path(path).rglob('.DS_store')) | |
| print(files) | |
| for f in files: | |
| f.unlink() | |
| if __name__ == '__main__': | |
| source = 'COCO' | |
| if source == 'COCO': | |
| convert_coco( | |
| '../datasets/coco/annotations', # directory with *.json | |
| use_segments=False, | |
| use_keypoints=True, | |
| cls91to80=False) | |