AIEM / trainer /utils /download_cvatdata.py
lhhj
initial ppush
463b952
raw
history blame
2.81 kB
"""
This script reads from a YAML file and downloads data from CVAT.
"""
import os
import argparse
import subprocess
import shutil
import yaml
from pathlib import Path
from cvat_dataset import CVATDataset
from merge_cocos import merge
from yolo_labels import get_yolo_labels
HOME = os.getenv("APP_HOME")
CVAT_TASKS = os.path.join(HOME, os.getenv("APP_CVAT_TASKS_YAML"))
PYPREPROCESS = os.getenv("APP_PYPREPROCESS")
import sys
sys.path.append(HOME)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'cvat_url',
type = str,
help = 'cvat url'
)
parser.add_argument(
'cvat_org',
type = str,
help = 'cvat organization'
)
parser.add_argument(
'-odir', '--output_dir',
type = str,
help = "path to download directory",
default = "/data"
)
args = parser.parse_args()
with open(CVAT_TASKS, "r") as f:
y = yaml.safe_load(f)
TASK_IDS = y["task_ids"]
NAMES = None
if "names" in y:
NAMES = y["names"]
data_folder = Path(args.output_dir)
data_folder.mkdir(parents=True, exist_ok=True)
CVAT = CVATDataset(
args.cvat_url,
args.cvat_org,
TASK_IDS,
names = NAMES,
dest_folder = data_folder
)
CVAT.download_tasks()
paths2imgs = []
paths2json = []
paths2dirs = []
for dataset in data_folder.rglob("*.zip"):
dir_name = dataset.parent / dataset.stem
paths2dirs.append(dir_name)
paths2imgs.append(dir_name / "images")
paths2json.append(dir_name / "annotations" / "instances_default.json")
if dir_name.exists():
continue
subprocess.call(['unzip', '-o', dataset, '-d', dir_name])
if PYPREPROCESS == 'true':
# looks for the py script called: trainer_files/preprocess.py
# this script is characteristic to the project
from trainer_files.preprocess import preprocess_cvat
paths2json, paths2imgs = preprocess_cvat(paths2dirs)
# TODO: add debugging / assert script to make sure preprocess is done correctly
# merge everything into a single json file
if len(paths2json) > 1:
merge(
paths2json, paths2imgs, data_folder / 'merged_cocos', 'merged', verbose=True
)
else:
json_file = Path(paths2json[0])
shutil.copy(
json_file.as_posix(),
(json_file.parents[1] / 'merged.json').as_posix()
)
shutil.move(
json_file.parents[1].as_posix(),
(data_folder / 'merged_cocos').as_posix()
)
# yolo format - labels
path2json = data_folder / 'merged_cocos' / 'merged.json'
get_yolo_labels(path2json, use_segment=False)