Spaces:
Sleeping
Sleeping
| /* | |
| * SPDX-License-Identifier: Apache-2.0 | |
| */ | |
| using namespace ONNX_NAMESPACE; | |
| namespace ONNX_NAMESPACE { | |
| static const char* RoiAlign_ver16_doc = R"DOC( | |
| Region of Interest (RoI) align operation described in the | |
| [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). | |
| RoiAlign consumes an input tensor X and region of interests (rois) | |
| to apply pooling across each RoI; it produces a 4-D tensor of shape | |
| (num_rois, C, output_height, output_width). | |
| RoiAlign is proposed to avoid the misalignment by removing | |
| quantizations while converting from original image into feature | |
| map and from feature map into RoI feature; in each ROI bin, | |
| the value of the sampled locations are computed directly | |
| through bilinear interpolation. | |
| )DOC"; | |
| ONNX_OPERATOR_SET_SCHEMA( | |
| RoiAlign, | |
| 16, | |
| OpSchema() | |
| .SetDoc(RoiAlign_ver16_doc) | |
| .Attr( | |
| "spatial_scale", | |
| "Multiplicative spatial scale factor to translate ROI coordinates " | |
| "from their input spatial scale to the scale used when pooling, " | |
| "i.e., spatial scale of the input feature map X relative to the " | |
| "input image. E.g.; default is 1.0f. ", | |
| AttributeProto::FLOAT, | |
| 1.f) | |
| .Attr("output_height", "default 1; Pooled output Y's height.", AttributeProto::INT, static_cast<int64_t>(1)) | |
| .Attr("output_width", "default 1; Pooled output Y's width.", AttributeProto::INT, static_cast<int64_t>(1)) | |
| .Attr( | |
| "sampling_ratio", | |
| "Number of sampling points in the interpolation grid used to compute " | |
| "the output value of each pooled output bin. If > 0, then exactly " | |
| "sampling_ratio x sampling_ratio grid points are used. If == 0, then " | |
| "an adaptive number of grid points are used (computed as " | |
| "ceil(roi_width / output_width), and likewise for height). Default is 0.", | |
| AttributeProto::INT, | |
| static_cast<int64_t>(0)) | |
| .Attr( | |
| "mode", | |
| "The pooling method. Two modes are supported: 'avg' and 'max'. " | |
| "Default is 'avg'.", | |
| AttributeProto::STRING, | |
| std::string("avg")) | |
| .Attr( | |
| "coordinate_transformation_mode", | |
| "Allowed values are 'half_pixel' and 'output_half_pixel'. " | |
| "Use the value 'half_pixel' to pixel shift the input coordinates by -0.5 (the recommended behavior). " | |
| "Use the value 'output_half_pixel' to omit the pixel shift for the input (use this for a " | |
| "backward-compatible behavior).", | |
| AttributeProto::STRING, | |
| std::string("half_pixel")) | |
| .Input( | |
| 0, | |
| "X", | |
| "Input data tensor from the previous operator; " | |
| "4-D feature map of shape (N, C, H, W), " | |
| "where N is the batch size, C is the number of channels, " | |
| "and H and W are the height and the width of the data.", | |
| "T1") | |
| .Input( | |
| 1, | |
| "rois", | |
| "RoIs (Regions of Interest) to pool over; rois is " | |
| "2-D input of shape (num_rois, 4) given as " | |
| "[[x1, y1, x2, y2], ...]. " | |
| "The RoIs' coordinates are in the coordinate system of the input image. " | |
| "Each coordinate set has a 1:1 correspondence with the 'batch_indices' input.", | |
| "T1") | |
| .Input( | |
| 2, | |
| "batch_indices", | |
| "1-D tensor of shape (num_rois,) with each element denoting " | |
| "the index of the corresponding image in the batch.", | |
| "T2") | |
| .Output( | |
| 0, | |
| "Y", | |
| "RoI pooled output, 4-D tensor of shape " | |
| "(num_rois, C, output_height, output_width). The r-th batch element Y[r-1] " | |
| "is a pooled feature map corresponding to the r-th RoI X[r-1].", | |
| "T1") | |
| .TypeConstraint( | |
| "T1", | |
| {"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
| "Constrain types to float tensors.") | |
| .TypeConstraint("T2", {"tensor(int64)"}, "Constrain types to int tensors.") | |
| .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
| propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
| size_t input_param = 0, rois_param = 1, batch_index_param = 2; | |
| checkInputRank(ctx, input_param, 4); | |
| checkInputRank(ctx, rois_param, 2); | |
| checkInputRank(ctx, batch_index_param, 1); | |
| // Output dimensions, initialized to an unknown-dimension-value | |
| Dim num_rois, C, ht, width; | |
| // Get value of C from dim 1 of input_param, if available | |
| unifyInputDim(ctx, input_param, 1, C); | |
| // Get value of num_rois from dim 0 of rois_param, if available | |
| unifyInputDim(ctx, rois_param, 0, num_rois); | |
| // ... or from dim 0 of batch_index_param, if available | |
| unifyInputDim(ctx, batch_index_param, 0, num_rois); | |
| // Get height from attribute, using default-value of 1 | |
| unifyDim(ht, getAttribute(ctx, "output_height", 1)); | |
| // Get width from attribute, using default-value of 1 | |
| unifyDim(width, getAttribute(ctx, "output_width", 1)); | |
| // set output shape: | |
| updateOutputShape(ctx, 0, {num_rois, C, ht, width}); | |
| })); | |
| static const char* NonMaxSuppression_ver11_doc = R"DOC( | |
| Filter out boxes that have high intersection-over-union (IOU) overlap with previously selected boxes. | |
| Bounding boxes with score less than score_threshold are removed. Bounding box format is indicated by attribute center_point_box. | |
| Note that this algorithm is agnostic to where the origin is in the coordinate system and more generally is invariant to | |
| orthogonal transformations and translations of the coordinate system; thus translating or reflections of the coordinate system | |
| result in the same boxes being selected by the algorithm. | |
| The selected_indices output is a set of integers indexing into the input collection of bounding boxes representing the selected boxes. | |
| The bounding box coordinates corresponding to the selected indices can then be obtained using the Gather or GatherND operation. | |
| )DOC"; | |
| ONNX_OPERATOR_SET_SCHEMA( | |
| NonMaxSuppression, | |
| 11, | |
| OpSchema() | |
| .Input( | |
| 0, | |
| "boxes", | |
| "An input tensor with shape [num_batches, spatial_dimension, 4]. The single box data format is indicated by center_point_box.", | |
| "tensor(float)") | |
| .Input(1, "scores", "An input tensor with shape [num_batches, num_classes, spatial_dimension]", "tensor(float)") | |
| .Input( | |
| 2, | |
| "max_output_boxes_per_class", | |
| "Integer representing the maximum number of boxes to be selected per batch per class. It is a scalar. Default to 0, which means no output.", | |
| "tensor(int64)", | |
| OpSchema::Optional) | |
| .Input( | |
| 3, | |
| "iou_threshold", | |
| "Float representing the threshold for deciding whether boxes overlap too much with respect to IOU. It is scalar. Value range [0, 1]. Default to 0.", | |
| "tensor(float)", | |
| OpSchema::Optional) | |
| .Input( | |
| 4, | |
| "score_threshold", | |
| "Float representing the threshold for deciding when to remove boxes based on score. It is a scalar.", | |
| "tensor(float)", | |
| OpSchema::Optional) | |
| .Output( | |
| 0, | |
| "selected_indices", | |
| "selected indices from the boxes tensor. [num_selected_indices, 3], the selected index format is [batch_index, class_index, box_index].", | |
| "tensor(int64)") | |
| .Attr( | |
| "center_point_box", | |
| "Integer indicate the format of the box data. The default is 0. " | |
| "0 - the box data is supplied as [y1, x1, y2, x2] where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair of box corners " | |
| "and the coordinates can be provided as normalized (i.e., lying in the interval [0, 1]) or absolute. Mostly used for TF models. " | |
| "1 - the box data is supplied as [x_center, y_center, width, height]. Mostly used for Pytorch models.", | |
| AttributeProto::INT, | |
| static_cast<int64_t>(0)) | |
| .SetDoc(NonMaxSuppression_ver11_doc) | |
| .TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
| // Type inference - Output is always of type INT64 | |
| auto* selected_indices_type = ctx.getOutputType(0)->mutable_tensor_type(); | |
| selected_indices_type->set_elem_type(TensorProto_DataType::TensorProto_DataType_INT64); | |
| // Shape inference | |
| // The exact shape cannot be determined as it depends on the input and | |
| // other input configurations for the op But part of the shape can be | |
| // established | |
| auto* selected_indices_shape = getOutputShape(ctx, 0); | |
| selected_indices_shape->clear_dim(); | |
| // Output is 2D always | |
| // The value of the first dim is determined by input data | |
| // hence its value cannot be determined statically | |
| selected_indices_shape->add_dim(); | |
| // The value of the second dim is 3 | |
| selected_indices_shape->add_dim()->set_dim_value(3); | |
| })); | |
| } // namespace ONNX_NAMESPACE | |