""" This script reads from a YAML file and downloads data from CVAT. """ import os import argparse import subprocess import shutil import yaml from pathlib import Path from cvat_dataset import CVATDataset from merge_cocos import merge from yolo_labels import get_yolo_labels HOME = os.getenv("APP_HOME") CVAT_TASKS = os.path.join(HOME, os.getenv("APP_CVAT_TASKS_YAML")) PYPREPROCESS = os.getenv("APP_PYPREPROCESS") import sys sys.path.append(HOME) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( 'cvat_url', type = str, help = 'cvat url' ) parser.add_argument( 'cvat_org', type = str, help = 'cvat organization' ) parser.add_argument( '-odir', '--output_dir', type = str, help = "path to download directory", default = "/data" ) args = parser.parse_args() with open(CVAT_TASKS, "r") as f: y = yaml.safe_load(f) TASK_IDS = y["task_ids"] NAMES = None if "names" in y: NAMES = y["names"] data_folder = Path(args.output_dir) data_folder.mkdir(parents=True, exist_ok=True) CVAT = CVATDataset( args.cvat_url, args.cvat_org, TASK_IDS, names = NAMES, dest_folder = data_folder ) CVAT.download_tasks() paths2imgs = [] paths2json = [] paths2dirs = [] for dataset in data_folder.rglob("*.zip"): dir_name = dataset.parent / dataset.stem paths2dirs.append(dir_name) paths2imgs.append(dir_name / "images") paths2json.append(dir_name / "annotations" / "instances_default.json") if dir_name.exists(): continue subprocess.call(['unzip', '-o', dataset, '-d', dir_name]) if PYPREPROCESS == 'true': # looks for the py script called: trainer_files/preprocess.py # this script is characteristic to the project from trainer_files.preprocess import preprocess_cvat paths2json, paths2imgs = preprocess_cvat(paths2dirs) # TODO: add debugging / assert script to make sure preprocess is done correctly # merge everything into a single json file if len(paths2json) > 1: merge( paths2json, paths2imgs, data_folder / 'merged_cocos', 'merged', verbose=True ) else: json_file = Path(paths2json[0]) shutil.copy( json_file.as_posix(), (json_file.parents[1] / 'merged.json').as_posix() ) shutil.move( json_file.parents[1].as_posix(), (data_folder / 'merged_cocos').as_posix() ) # yolo format - labels path2json = data_folder / 'merged_cocos' / 'merged.json' get_yolo_labels(path2json, use_segment=False)