Upload 10 files
Browse files- configs/dataset/coco_detection.yml +38 -0
- configs/rtdetr/include/dataloader.yml +39 -0
- configs/rtdetr/include/optimizer.yml +36 -0
- configs/rtdetr/include/rtdetr_r50vd.yml +81 -0
- configs/rtdetr/rtdetr_r101vd_6x_coco.yml +28 -0
- configs/rtdetr/rtdetr_r18vd_6x_coco.yml +49 -0
- configs/rtdetr/rtdetr_r34vd_6x_coco.yml +48 -0
- configs/rtdetr/rtdetr_r50vd_6x_coco.yml +9 -0
- configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml +16 -0
- configs/runtime.yml +17 -0
configs/dataset/coco_detection.yml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
task: detection
|
2 |
+
|
3 |
+
num_classes: 9
|
4 |
+
remap_mscoco_category: False
|
5 |
+
|
6 |
+
train_dataloader:
|
7 |
+
type: DataLoader
|
8 |
+
dataset:
|
9 |
+
type: CocoDetection
|
10 |
+
img_folder: ../../AITOD/train/images/
|
11 |
+
# img_folder: /data/student1/keshe/RT-DETR/rtdetr_pytorch/sahi/sliced/aitod_val_v1_images_400_025/
|
12 |
+
ann_file: ../../AITOD/annotations/aitod_train_v1.json
|
13 |
+
#ann_file: /data/student1/keshe/RT-DETR/rtdetr_pytorch/sahi/sliced/aitod_val_v1_400_025.json
|
14 |
+
transforms:
|
15 |
+
type: Compose
|
16 |
+
ops: ~
|
17 |
+
shuffle: True
|
18 |
+
batch_size: 4
|
19 |
+
num_workers: 1
|
20 |
+
drop_last: True
|
21 |
+
|
22 |
+
|
23 |
+
val_dataloader:
|
24 |
+
type: DataLoader
|
25 |
+
dataset:
|
26 |
+
type: CocoDetection
|
27 |
+
img_folder: ../../AITOD/val/images/
|
28 |
+
#img_folder: /data/student1/keshe/RT-DETR/rtdetr_pytorch/sahi/sliced/aitod_val_v1_images_400_025/
|
29 |
+
ann_file: ../../AITOD/annotations/aitod_val_v1.json
|
30 |
+
#ann_file: /data/student1/keshe/RT-DETR/rtdetr_pytorch/sahi/sliced/aitod_val_v1_400_025.json
|
31 |
+
transforms:
|
32 |
+
type: Compose
|
33 |
+
ops: ~
|
34 |
+
|
35 |
+
shuffle: False
|
36 |
+
batch_size: 1
|
37 |
+
num_workers: 1
|
38 |
+
drop_last: False
|
configs/rtdetr/include/dataloader.yml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# num_classes: 91
|
2 |
+
# remap_mscoco_category: True
|
3 |
+
|
4 |
+
train_dataloader:
|
5 |
+
dataset:
|
6 |
+
return_masks: False
|
7 |
+
transforms:
|
8 |
+
ops:
|
9 |
+
- {type: RandomPhotometricDistort, p: 0.5}
|
10 |
+
- {type: RandomZoomOut, fill: 0}
|
11 |
+
- {type: RandomIoUCrop, p: 0.8}
|
12 |
+
- {type: SanitizeBoundingBox, min_size: 1}
|
13 |
+
- {type: RandomHorizontalFlip}
|
14 |
+
- {type: Resize, size: [640, 640], }
|
15 |
+
# - {type: Resize, size: 639, max_size: 640}
|
16 |
+
# - {type: PadToSize, spatial_size: 640}
|
17 |
+
- {type: ToImageTensor}
|
18 |
+
- {type: ConvertDtype}
|
19 |
+
- {type: SanitizeBoundingBox, min_size: 1}
|
20 |
+
- {type: ConvertBox, out_fmt: 'cxcywh', normalize: True}
|
21 |
+
shuffle: True
|
22 |
+
batch_size: 2
|
23 |
+
num_workers: 0
|
24 |
+
collate_fn: default_collate_fn
|
25 |
+
|
26 |
+
|
27 |
+
val_dataloader:
|
28 |
+
dataset:
|
29 |
+
transforms:
|
30 |
+
ops:
|
31 |
+
# - {type: Resize, size: 639, max_size: 640}
|
32 |
+
# - {type: PadToSize, spatial_size: 640}
|
33 |
+
- {type: Resize, size: [640, 640]}
|
34 |
+
- {type: ToImageTensor}
|
35 |
+
- {type: ConvertDtype}
|
36 |
+
shuffle: False
|
37 |
+
batch_size: 2
|
38 |
+
num_workers: 0
|
39 |
+
collate_fn: default_collate_fn
|
configs/rtdetr/include/optimizer.yml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
use_ema: True
|
3 |
+
ema:
|
4 |
+
type: ModelEMA
|
5 |
+
decay: 0.9999
|
6 |
+
warmups: 2000
|
7 |
+
|
8 |
+
|
9 |
+
find_unused_parameters: True
|
10 |
+
|
11 |
+
epoches: 72
|
12 |
+
clip_max_norm: 0.1
|
13 |
+
|
14 |
+
optimizer:
|
15 |
+
type: AdamW
|
16 |
+
params:
|
17 |
+
-
|
18 |
+
params: 'backbone'
|
19 |
+
lr: 0.00001
|
20 |
+
-
|
21 |
+
params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
|
22 |
+
weight_decay: 0.
|
23 |
+
-
|
24 |
+
params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
|
25 |
+
weight_decay: 0.
|
26 |
+
|
27 |
+
lr: 0.0001
|
28 |
+
betas: [0.9, 0.999]
|
29 |
+
weight_decay: 0.0001
|
30 |
+
|
31 |
+
|
32 |
+
lr_scheduler:
|
33 |
+
type: MultiStepLR
|
34 |
+
milestones: [1000]
|
35 |
+
gamma: 0.1
|
36 |
+
|
configs/rtdetr/include/rtdetr_r50vd.yml
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
task: detection
|
2 |
+
|
3 |
+
model: RTDETR
|
4 |
+
criterion: SetCriterion
|
5 |
+
postprocessor: RTDETRPostProcessor
|
6 |
+
|
7 |
+
|
8 |
+
RTDETR:
|
9 |
+
backbone: PResNet
|
10 |
+
encoder: HybridEncoder
|
11 |
+
decoder: RTDETRTransformer
|
12 |
+
multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
|
13 |
+
|
14 |
+
PResNet:
|
15 |
+
depth: 50
|
16 |
+
variant: d
|
17 |
+
freeze_at: 0
|
18 |
+
return_idx: [1, 2, 3]
|
19 |
+
num_stages: 4
|
20 |
+
freeze_norm: True
|
21 |
+
pretrained: True
|
22 |
+
|
23 |
+
HybridEncoder:
|
24 |
+
in_channels: [512, 1024, 2048]
|
25 |
+
feat_strides: [8, 16, 32]
|
26 |
+
|
27 |
+
# intra
|
28 |
+
hidden_dim: 256
|
29 |
+
use_encoder_idx: [2]
|
30 |
+
num_encoder_layers: 1
|
31 |
+
nhead: 8
|
32 |
+
dim_feedforward: 1024
|
33 |
+
dropout: 0.
|
34 |
+
enc_act: 'gelu'
|
35 |
+
pe_temperature: 10000
|
36 |
+
|
37 |
+
# cross
|
38 |
+
expansion: 1.0
|
39 |
+
depth_mult: 1
|
40 |
+
act: 'silu'
|
41 |
+
|
42 |
+
# eval
|
43 |
+
eval_spatial_size: [640, 640]
|
44 |
+
|
45 |
+
|
46 |
+
RTDETRTransformer:
|
47 |
+
feat_channels: [256, 256, 256]
|
48 |
+
feat_strides: [8, 16, 32]
|
49 |
+
hidden_dim: 256
|
50 |
+
num_levels: 3
|
51 |
+
|
52 |
+
num_queries: 300
|
53 |
+
|
54 |
+
num_decoder_layers: 6
|
55 |
+
num_denoising: 100
|
56 |
+
|
57 |
+
eval_idx: -1
|
58 |
+
eval_spatial_size: [640, 640]
|
59 |
+
|
60 |
+
|
61 |
+
use_focal_loss: True
|
62 |
+
|
63 |
+
RTDETRPostProcessor:
|
64 |
+
num_top_queries: 300
|
65 |
+
|
66 |
+
|
67 |
+
SetCriterion:
|
68 |
+
weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
|
69 |
+
losses: ['vfl', 'boxes', ]
|
70 |
+
alpha: 0.75
|
71 |
+
gamma: 2.0
|
72 |
+
|
73 |
+
matcher:
|
74 |
+
type: HungarianMatcher
|
75 |
+
weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
|
76 |
+
# use_focal_loss: True
|
77 |
+
alpha: 0.25
|
78 |
+
gamma: 2.0
|
79 |
+
|
80 |
+
|
81 |
+
|
configs/rtdetr/rtdetr_r101vd_6x_coco.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__include__: [
|
2 |
+
'../dataset/coco_detection.yml',
|
3 |
+
'../runtime.yml',
|
4 |
+
'./include/dataloader.yml',
|
5 |
+
'./include/optimizer.yml',
|
6 |
+
'./include/rtdetr_r50vd.yml',
|
7 |
+
]
|
8 |
+
|
9 |
+
PResNet:
|
10 |
+
depth: 101
|
11 |
+
|
12 |
+
|
13 |
+
HybridEncoder:
|
14 |
+
# intra
|
15 |
+
hidden_dim: 384
|
16 |
+
dim_feedforward: 2048
|
17 |
+
|
18 |
+
|
19 |
+
RTDETRTransformer:
|
20 |
+
feat_channels: [384, 384, 384]
|
21 |
+
|
22 |
+
|
23 |
+
optimizer:
|
24 |
+
type: AdamW
|
25 |
+
params:
|
26 |
+
-
|
27 |
+
params: 'backbone'
|
28 |
+
lr: 0.000001
|
configs/rtdetr/rtdetr_r18vd_6x_coco.yml
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
__include__: [
|
3 |
+
'../dataset/coco_detection.yml',
|
4 |
+
'../runtime.yml',
|
5 |
+
'./include/dataloader.yml',
|
6 |
+
'./include/optimizer.yml',
|
7 |
+
'./include/rtdetr_r50vd.yml',
|
8 |
+
]
|
9 |
+
|
10 |
+
|
11 |
+
output_dir: ./output/rtdetr_r18vd_6x_coco
|
12 |
+
|
13 |
+
PResNet:
|
14 |
+
depth: 18
|
15 |
+
freeze_at: -1
|
16 |
+
freeze_norm: False
|
17 |
+
pretrained: True
|
18 |
+
|
19 |
+
HybridEncoder:
|
20 |
+
in_channels: [128, 256, 512]
|
21 |
+
hidden_dim: 256
|
22 |
+
expansion: 0.5
|
23 |
+
|
24 |
+
|
25 |
+
RTDETRTransformer:
|
26 |
+
eval_idx: -1
|
27 |
+
num_decoder_layers: 3
|
28 |
+
num_denoising: 100
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
optimizer:
|
33 |
+
type: AdamW
|
34 |
+
params:
|
35 |
+
-
|
36 |
+
params: '^(?=.*backbone)(?=.*norm).*$'
|
37 |
+
lr: 0.00001
|
38 |
+
weight_decay: 0.
|
39 |
+
-
|
40 |
+
params: '^(?=.*backbone)(?!.*norm).*$'
|
41 |
+
lr: 0.00001
|
42 |
+
-
|
43 |
+
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$'
|
44 |
+
weight_decay: 0.
|
45 |
+
|
46 |
+
lr: 0.0001
|
47 |
+
betas: [0.9, 0.999]
|
48 |
+
weight_decay: 0.0001
|
49 |
+
|
configs/rtdetr/rtdetr_r34vd_6x_coco.yml
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
__include__: [
|
3 |
+
'../dataset/coco_detection.yml',
|
4 |
+
'../runtime.yml',
|
5 |
+
'./include/dataloader.yml',
|
6 |
+
'./include/optimizer.yml',
|
7 |
+
'./include/rtdetr_r50vd.yml',
|
8 |
+
]
|
9 |
+
|
10 |
+
|
11 |
+
output_dir: ./output/rtdetr_r34vd_6x_coco
|
12 |
+
|
13 |
+
|
14 |
+
PResNet:
|
15 |
+
depth: 34
|
16 |
+
freeze_at: -1
|
17 |
+
freeze_norm: False
|
18 |
+
pretrained: True
|
19 |
+
|
20 |
+
|
21 |
+
HybridEncoder:
|
22 |
+
in_channels: [128, 256, 512]
|
23 |
+
hidden_dim: 256
|
24 |
+
expansion: 0.5
|
25 |
+
|
26 |
+
|
27 |
+
RTDETRTransformer:
|
28 |
+
num_decoder_layers: 4
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
optimizer:
|
33 |
+
type: AdamW
|
34 |
+
params:
|
35 |
+
-
|
36 |
+
params: '^(?=.*backbone)(?=.*norm|bn).*$'
|
37 |
+
weight_decay: 0.
|
38 |
+
lr: 0.00001
|
39 |
+
-
|
40 |
+
params: '^(?=.*backbone)(?!.*norm|bn).*$'
|
41 |
+
lr: 0.00001
|
42 |
+
-
|
43 |
+
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
|
44 |
+
weight_decay: 0.
|
45 |
+
|
46 |
+
lr: 0.0001
|
47 |
+
betas: [0.9, 0.999]
|
48 |
+
weight_decay: 0.0001
|
configs/rtdetr/rtdetr_r50vd_6x_coco.yml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__include__: [
|
2 |
+
'../dataset/coco_detection.yml',
|
3 |
+
'../runtime.yml',
|
4 |
+
'./include/dataloader.yml',
|
5 |
+
'./include/optimizer.yml',
|
6 |
+
'./include/rtdetr_r50vd.yml',
|
7 |
+
]
|
8 |
+
|
9 |
+
output_dir: ./output/rtdetr_r50vd_6x_coco
|
configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__include__: [
|
2 |
+
'../dataset/coco_detection.yml',
|
3 |
+
'../runtime.yml',
|
4 |
+
'./include/dataloader.yml',
|
5 |
+
'./include/optimizer.yml',
|
6 |
+
'./include/rtdetr_r50vd.yml',
|
7 |
+
]
|
8 |
+
|
9 |
+
output_dir: ./output/rtdetr_r50vd_m_6x_coco
|
10 |
+
|
11 |
+
|
12 |
+
HybridEncoder:
|
13 |
+
expansion: 0.5
|
14 |
+
|
15 |
+
RTDETRTransformer:
|
16 |
+
eval_idx: 2 # use 3th decoder layer to eval
|
configs/runtime.yml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sync_bn: True
|
2 |
+
find_unused_parameters: False
|
3 |
+
|
4 |
+
|
5 |
+
use_amp: False
|
6 |
+
|
7 |
+
scaler:
|
8 |
+
type: GradScaler
|
9 |
+
enabled: True
|
10 |
+
|
11 |
+
|
12 |
+
use_ema: False
|
13 |
+
ema:
|
14 |
+
type: ModelEMA
|
15 |
+
decay: 0.9999
|
16 |
+
warmups: 2000
|
17 |
+
|