sajabdoli commited on
Commit
472d78b
·
verified ·
1 Parent(s): 58f7d9b

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +71 -0
  2. config.json +1 -0
  3. inference.py +54 -0
  4. metadata.json +1 -0
  5. model.pth +3 -0
  6. requirements.txt +5 -0
README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Detectron2 Instance Segmentation Model
2
+
3
+ This repository contains a Detectron2 model for instance segmentation. The model is a GeneralizedRCNN with a build_resnet_fpn_backbone backbone.
4
+
5
+ ## Model Details
6
+
7
+ - **Architecture**: GeneralizedRCNN
8
+ - **Backbone**: build_resnet_fpn_backbone
9
+ - **Classes**: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
10
+ - **Training Dataset**: coco_2017_train
11
+
12
+ ## Usage with Detectron2
13
+
14
+ ```python
15
+ import detectron2
16
+ from detectron2.config import get_cfg
17
+ from detectron2.modeling import build_model
18
+ from detectron2.checkpoint import DetectionCheckpointer
19
+ import torch
20
+ import json
21
+
22
+ # Set up configuration
23
+ cfg = get_cfg()
24
+ with open("config.json", "r") as f:
25
+ cfg_dict = json.load(f)
26
+ cfg.merge_from_dict(cfg_dict)
27
+
28
+ # Build model
29
+ model = build_model(cfg)
30
+
31
+ # Load weights
32
+ checkpointer = DetectionCheckpointer(model)
33
+ checkpointer.load("model.pth")
34
+
35
+ # Set model to evaluation mode
36
+ model.eval()
37
+
38
+ # For inference
39
+ from detectron2.engine import DefaultPredictor
40
+ predictor = DefaultPredictor(cfg)
41
+
42
+ # Load an image
43
+ import cv2
44
+ image = cv2.imread("your_image.jpg")
45
+ outputs = predictor(image)
46
+ ```
47
+
48
+ ## Sample Visualization Code
49
+
50
+ ```python
51
+ from detectron2.utils.visualizer import Visualizer
52
+ from detectron2.data import MetadataCatalog
53
+ import cv2
54
+
55
+ # Load class metadata
56
+ with open("metadata.json", "r") as f:
57
+ metadata_dict = json.load(f)
58
+
59
+ # Create metadata
60
+ metadata = MetadataCatalog.get("inference")
61
+ metadata.thing_classes = metadata_dict["thing_classes"]
62
+
63
+ # Visualize predictions
64
+ v = Visualizer(image[:, :, ::-1], metadata=metadata, scale=1.2)
65
+ out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
66
+ cv2.imwrite("output.jpg", out.get_image()[:, :, ::-1])
67
+ ```
68
+
69
+ ## Model Card for sajabdoli/detectron2-instance-segmentation
70
+
71
+ This model is a Detectron2 implementation of instance segmentation. It can detect and segment objects in images.
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "CUDNN_BENCHMARK: false\nDATALOADER:\n ASPECT_RATIO_GROUPING: true\n FILTER_EMPTY_ANNOTATIONS: true\n NUM_WORKERS: 4\n REPEAT_SQRT: true\n REPEAT_THRESHOLD: 0.0\n SAMPLER_TRAIN: TrainingSampler\nDATASETS:\n PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000\n PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000\n PROPOSAL_FILES_TEST: []\n PROPOSAL_FILES_TRAIN: []\n TEST:\n - coco_2017_val\n TRAIN:\n - coco_2017_train\nFLOAT32_PRECISION: ''\nGLOBAL:\n HACK: 1.0\nINPUT:\n CROP:\n ENABLED: false\n SIZE:\n - 0.9\n - 0.9\n TYPE: relative_range\n FORMAT: BGR\n MASK_FORMAT: polygon\n MAX_SIZE_TEST: 1333\n MAX_SIZE_TRAIN: 1333\n MIN_SIZE_TEST: 800\n MIN_SIZE_TRAIN:\n - 640\n - 672\n - 704\n - 736\n - 768\n - 800\n MIN_SIZE_TRAIN_SAMPLING: choice\n RANDOM_FLIP: horizontal\nMODEL:\n ANCHOR_GENERATOR:\n ANGLES:\n - - -90\n - 0\n - 90\n ASPECT_RATIOS:\n - - 0.5\n - 1.0\n - 2.0\n NAME: DefaultAnchorGenerator\n OFFSET: 0.0\n SIZES:\n - - 32\n - - 64\n - - 128\n - - 256\n - - 512\n BACKBONE:\n FREEZE_AT: 2\n NAME: build_resnet_fpn_backbone\n DEVICE: cpu\n FPN:\n FUSE_TYPE: sum\n IN_FEATURES:\n - res2\n - res3\n - res4\n - res5\n NORM: ''\n OUT_CHANNELS: 256\n KEYPOINT_ON: false\n LOAD_PROPOSALS: false\n MASK_ON: true\n META_ARCHITECTURE: GeneralizedRCNN\n PANOPTIC_FPN:\n COMBINE:\n ENABLED: true\n INSTANCES_CONFIDENCE_THRESH: 0.5\n OVERLAP_THRESH: 0.5\n STUFF_AREA_LIMIT: 4096\n INSTANCE_LOSS_WEIGHT: 1.0\n PIXEL_MEAN:\n - 103.53\n - 116.28\n - 123.675\n PIXEL_STD:\n - 1.0\n - 1.0\n - 1.0\n PROPOSAL_GENERATOR:\n MIN_SIZE: 0\n NAME: RPN\n RESNETS:\n DEFORM_MODULATED: false\n DEFORM_NUM_GROUPS: 1\n DEFORM_ON_PER_STAGE:\n - false\n - false\n - false\n - false\n DEPTH: 50\n NORM: FrozenBN\n NUM_GROUPS: 1\n OUT_FEATURES:\n - res2\n - res3\n - res4\n - res5\n RES2_OUT_CHANNELS: 256\n RES5_DILATION: 1\n STEM_OUT_CHANNELS: 64\n STRIDE_IN_1X1: true\n WIDTH_PER_GROUP: 64\n RETINANET:\n BBOX_REG_LOSS_TYPE: smooth_l1\n BBOX_REG_WEIGHTS: &id002\n - 1.0\n - 1.0\n - 1.0\n - 1.0\n FOCAL_LOSS_ALPHA: 0.25\n FOCAL_LOSS_GAMMA: 2.0\n IN_FEATURES:\n - p3\n - p4\n - p5\n - p6\n - p7\n IOU_LABELS:\n - 0\n - -1\n - 1\n IOU_THRESHOLDS:\n - 0.4\n - 0.5\n NMS_THRESH_TEST: 0.5\n NORM: ''\n NUM_CLASSES: 80\n NUM_CONVS: 4\n PRIOR_PROB: 0.01\n SCORE_THRESH_TEST: 0.05\n SMOOTH_L1_LOSS_BETA: 0.1\n TOPK_CANDIDATES_TEST: 1000\n ROI_BOX_CASCADE_HEAD:\n BBOX_REG_WEIGHTS:\n - &id001\n - 10.0\n - 10.0\n - 5.0\n - 5.0\n - - 20.0\n - 20.0\n - 10.0\n - 10.0\n - - 30.0\n - 30.0\n - 15.0\n - 15.0\n IOUS:\n - 0.5\n - 0.6\n - 0.7\n ROI_BOX_HEAD:\n BBOX_REG_LOSS_TYPE: smooth_l1\n BBOX_REG_LOSS_WEIGHT: 1.0\n BBOX_REG_WEIGHTS: *id001\n CLS_AGNOSTIC_BBOX_REG: false\n CONV_DIM: 256\n FC_DIM: 1024\n FED_LOSS_FREQ_WEIGHT_POWER: 0.5\n FED_LOSS_NUM_CLASSES: 50\n NAME: FastRCNNConvFCHead\n NORM: ''\n NUM_CONV: 0\n NUM_FC: 2\n POOLER_RESOLUTION: 7\n POOLER_SAMPLING_RATIO: 0\n POOLER_TYPE: ROIAlignV2\n SMOOTH_L1_BETA: 0.0\n TRAIN_ON_PRED_BOXES: false\n USE_FED_LOSS: false\n USE_SIGMOID_CE: false\n ROI_HEADS:\n BATCH_SIZE_PER_IMAGE: 512\n IN_FEATURES:\n - p2\n - p3\n - p4\n - p5\n IOU_LABELS:\n - 0\n - 1\n IOU_THRESHOLDS:\n - 0.5\n NAME: StandardROIHeads\n NMS_THRESH_TEST: 0.5\n NUM_CLASSES: 80\n POSITIVE_FRACTION: 0.25\n PROPOSAL_APPEND_GT: true\n SCORE_THRESH_TEST: 0.5\n ROI_KEYPOINT_HEAD:\n CONV_DIMS:\n - 512\n - 512\n - 512\n - 512\n - 512\n - 512\n - 512\n - 512\n LOSS_WEIGHT: 1.0\n MIN_KEYPOINTS_PER_IMAGE: 1\n NAME: KRCNNConvDeconvUpsampleHead\n NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true\n NUM_KEYPOINTS: 17\n POOLER_RESOLUTION: 14\n POOLER_SAMPLING_RATIO: 0\n POOLER_TYPE: ROIAlignV2\n ROI_MASK_HEAD:\n CLS_AGNOSTIC_MASK: false\n CONV_DIM: 256\n NAME: MaskRCNNConvUpsampleHead\n NORM: ''\n NUM_CONV: 4\n POOLER_RESOLUTION: 14\n POOLER_SAMPLING_RATIO: 0\n POOLER_TYPE: ROIAlignV2\n RPN:\n BATCH_SIZE_PER_IMAGE: 256\n BBOX_REG_LOSS_TYPE: smooth_l1\n BBOX_REG_LOSS_WEIGHT: 1.0\n BBOX_REG_WEIGHTS: *id002\n BOUNDARY_THRESH: -1\n CONV_DIMS:\n - -1\n HEAD_NAME: StandardRPNHead\n IN_FEATURES:\n - p2\n - p3\n - p4\n - p5\n - p6\n IOU_LABELS:\n - 0\n - -1\n - 1\n IOU_THRESHOLDS:\n - 0.3\n - 0.7\n LOSS_WEIGHT: 1.0\n NMS_THRESH: 0.7\n POSITIVE_FRACTION: 0.5\n POST_NMS_TOPK_TEST: 1000\n POST_NMS_TOPK_TRAIN: 1000\n PRE_NMS_TOPK_TEST: 1000\n PRE_NMS_TOPK_TRAIN: 2000\n SMOOTH_L1_BETA: 0.0\n SEM_SEG_HEAD:\n COMMON_STRIDE: 4\n CONVS_DIM: 128\n IGNORE_VALUE: 255\n IN_FEATURES:\n - p2\n - p3\n - p4\n - p5\n LOSS_WEIGHT: 1.0\n NAME: SemSegFPNHead\n NORM: GN\n NUM_CLASSES: 54\n WEIGHTS: https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl\nOUTPUT_DIR: ./output\nSEED: -1\nSOLVER:\n AMP:\n ENABLED: false\n BASE_LR: 0.02\n BASE_LR_END: 0.0\n BIAS_LR_FACTOR: 1.0\n CHECKPOINT_PERIOD: 5000\n CLIP_GRADIENTS:\n CLIP_TYPE: value\n CLIP_VALUE: 1.0\n ENABLED: false\n NORM_TYPE: 2.0\n GAMMA: 0.1\n IMS_PER_BATCH: 16\n LR_SCHEDULER_NAME: WarmupMultiStepLR\n MAX_ITER: 270000\n MOMENTUM: 0.9\n NESTEROV: false\n NUM_DECAYS: 3\n REFERENCE_WORLD_SIZE: 0\n RESCALE_INTERVAL: false\n STEPS:\n - 210000\n - 250000\n WARMUP_FACTOR: 0.001\n WARMUP_ITERS: 1000\n WARMUP_METHOD: linear\n WEIGHT_DECAY: 0.0001\n WEIGHT_DECAY_BIAS: null\n WEIGHT_DECAY_NORM: 0.0\nTEST:\n AUG:\n ENABLED: false\n FLIP: true\n MAX_SIZE: 4000\n MIN_SIZES:\n - 400\n - 500\n - 600\n - 700\n - 800\n - 900\n - 1000\n - 1100\n - 1200\n DETECTIONS_PER_IMAGE: 100\n EVAL_PERIOD: 0\n EXPECTED_RESULTS: []\n KEYPOINT_OKS_SIGMAS: []\n PRECISE_BN:\n ENABLED: false\n NUM_ITER: 200\nVERSION: 2\nVIS_PERIOD: 0\n"
inference.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import detectron2
4
+ from detectron2.config import get_cfg
5
+ from detectron2.engine import DefaultPredictor
6
+ from detectron2.utils.visualizer import Visualizer
7
+ from detectron2.data import MetadataCatalog
8
+ import cv2
9
+ import json
10
+ import argparse
11
+
12
+ def main():
13
+ parser = argparse.ArgumentParser(description="Run inference with Detectron2 model")
14
+ parser.add_argument("--image", required=True, help="Path to input image")
15
+ parser.add_argument("--output", default="output.jpg", help="Path to output image")
16
+ args = parser.parse_args()
17
+
18
+ # Load config
19
+ cfg = get_cfg()
20
+ with open("config.json", "r") as f:
21
+ cfg_dict = json.load(f)
22
+ cfg.merge_from_dict(cfg_dict)
23
+
24
+ # Update inference parameters
25
+ cfg.MODEL.WEIGHTS = "model.pth"
26
+ cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27
+
28
+ # Create predictor
29
+ predictor = DefaultPredictor(cfg)
30
+
31
+ # Load image
32
+ image = cv2.imread(args.image)
33
+
34
+ # Run prediction
35
+ outputs = predictor(image)
36
+
37
+ # Load metadata
38
+ with open("metadata.json", "r") as f:
39
+ metadata_dict = json.load(f)
40
+
41
+ # Setup metadata
42
+ metadata = MetadataCatalog.get("inference")
43
+ metadata.thing_classes = metadata_dict["thing_classes"]
44
+
45
+ # Visualize
46
+ v = Visualizer(image[:, :, ::-1], metadata=metadata, scale=1.2)
47
+ out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
48
+
49
+ # Save output
50
+ cv2.imwrite(args.output, out.get_image()[:, :, ::-1])
51
+ print(f"Saved output to {args.output}")
52
+
53
+ if __name__ == "__main__":
54
+ main()
metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"thing_classes": ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]}
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b990412f04e124ef3a22aad4604870357cbaa5453f10c090fd7666222448041
3
+ size 177890206
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+
2
+ torch>=1.7.0
3
+ detectron2
4
+ numpy
5
+ opencv-python