Spaces:
Sleeping
Sleeping
/* | |
* SPDX-License-Identifier: Apache-2.0 | |
*/ | |
namespace ONNX_NAMESPACE { | |
static const char* Dropout_ver12_doc = R"DOC( | |
Dropout takes an input floating-point tensor, an optional input ratio (floating-point scalar) and an optional input training_mode (boolean scalar). It produces two tensor outputs, | |
output (floating-point tensor) and mask (optional `Tensor<bool>`). If `training_mode` is true then the output Y will be a random dropout; | |
Note that this Dropout scales the masked input data by the following equation, so to convert the trained model into inference mode, | |
the user can simply not pass `training_mode` input or set it to false. | |
``` | |
output = scale * data * mask, | |
``` | |
where | |
``` | |
scale = 1. / (1. - ratio). | |
``` | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
Dropout, | |
12, | |
OpSchema() | |
.SetDoc(GET_OP_DOC_STR(std::string(Dropout_ver12_doc) + GenerateOptionalArgumentsDoc())) | |
.Attr( | |
"seed", | |
"(Optional) Seed to the random generator, if not specified we will auto generate one.", | |
AttributeProto::INT, | |
OPTIONAL_VALUE) | |
.Input(0, "data", "The input data as Tensor.", "T") | |
.Input( | |
1, | |
"ratio", | |
"The ratio of random dropout, with value in [0, 1). If this input was not set, " | |
"or if it was set to 0, the output would be a simple copy of the input. " | |
"If it's non-zero, output will be a random dropout of the scaled input, which is typically " | |
"the case during training. It is an optional value, if not specified it will default to 0.5.", | |
"T1", | |
OpSchema::Optional) | |
.Input( | |
2, | |
"training_mode", | |
"If set to true then it indicates dropout is being used for training. It is an optional value hence unless " | |
"specified explicitly, it is false. If it is false, ratio is ignored and the operation mimics inference mode where " | |
"nothing will be dropped from the input data and if mask is requested as output it will contain all ones.", | |
"T2", | |
OpSchema::Optional) | |
.Output(0, "output", "The output.", "T") | |
.Output(1, "mask", "The output mask.", "T2", OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeConstraint( | |
"T1", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input 'ratio' types to float tensors.") | |
.TypeConstraint("T2", {"tensor(bool)"}, "Constrain output 'mask' types to boolean tensors.") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (hasInputShape(ctx, 0)) { | |
propagateShapeFromInputToOutput(ctx, 0, 0); | |
} | |
if (ctx.getNumInputs() > 1 && hasInputShape(ctx, 1)) { | |
auto& ratio_input_shape = getInputShape(ctx, 1); | |
if (static_cast<int>(ratio_input_shape.dim_size()) != 0) { | |
fail_shape_inference("Ratio of Dropout must be a scalar."); | |
} | |
} | |
if (ctx.getNumInputs() > 2 && hasInputShape(ctx, 2)) { | |
auto& training_mode_input_shape = getInputShape(ctx, 2); | |
if (static_cast<int>(training_mode_input_shape.dim_size()) != 0) { | |
fail_shape_inference("training_mode of Dropout must be a scalar."); | |
} | |
} | |
if (ctx.getNumOutputs() == 2) { | |
updateOutputElemType(ctx, 1, TensorProto::BOOL); | |
if (hasNInputShapes(ctx, 1)) { | |
propagateShapeFromInputToOutput(ctx, 0, 1); | |
} | |
} | |
})); | |
static const char* Flatten_ver11_doc = R"DOC( | |
Flattens the input tensor into a 2D matrix. If input tensor has shape | |
(d_0, d_1, ... d_n) then the output will have shape | |
(d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn). | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
Flatten, | |
13, | |
OpSchema() | |
.SetDoc(Flatten_ver11_doc) | |
.Input(0, "input", "A tensor of rank >= axis.", "T", OpSchema::Single, true, 1, OpSchema::Differentiable) | |
.Output( | |
0, | |
"output", | |
"A 2D tensor with the contents of the input tensor, " | |
"with input dimensions up to axis flattened to the outer dimension " | |
"of the output and remaining input dimensions flattened into the inner " | |
"dimension of the output.", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.TypeConstraint("T", OpSchema::all_tensor_types_ir4(), "Constrain input and output to all tensor types.") | |
.Attr( | |
"axis", | |
"Indicate up to which input dimensions " | |
"(exclusive) should be flattened to the outer dimension of the output. " | |
"The value for axis must be in the range [-r, r], where r is the rank of the input tensor. " | |
"Negative value means counting dimensions from the back. " | |
"When axis = 0, the shape of the output tensor is (1, (d_0 X d_1 ... d_n), " | |
"where the shape of the input tensor is (d_0, d_1, ... d_n). ", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (!hasInputShape(ctx, 0)) | |
return; | |
auto& input_shape = getInputShape(ctx, 0); | |
int rank = static_cast<int>(input_shape.dim_size()); | |
int axis = static_cast<int>(getAttribute(ctx, "axis", 1)); | |
if (axis < 0) { | |
axis += rank; | |
} | |
if (axis > rank || axis < 0) { | |
fail_shape_inference("Invalid value(", axis, ") for attribute 'axis'"); | |
} | |
// TODO: is the operation defined for input-rank < 2? | |
updateOutputShape(ctx, 0, {multiplyDims(input_shape, 0, axis), multiplyDims(input_shape, axis, rank)}); | |
})); | |
ONNX_OPERATOR_SET_SCHEMA( | |
Flatten, | |
11, | |
OpSchema() | |
.SetDoc(Flatten_ver11_doc) | |
.Input(0, "input", "A tensor of rank >= axis.", "T") | |
.Output( | |
0, | |
"output", | |
"A 2D tensor with the contents of the input tensor, " | |
"with input dimensions up to axis flattened to the outer dimension " | |
"of the output and remaining input dimensions flattened into the inner " | |
"dimension of the output.", | |
"T") | |
.TypeConstraint("T", OpSchema::all_tensor_types(), "Constrain input and output to all tensor types.") | |
.Attr( | |
"axis", | |
"Indicate up to which input dimensions " | |
"(exclusive) should be flattened to the outer dimension of the output. " | |
"The value for axis must be in the range [-r, r], where r is the rank of the input tensor. " | |
"Negative value means counting dimensions from the back. " | |
"When axis = 0, the shape of the output tensor is (1, (d_0 X d_1 ... d_n), " | |
"where the shape of the input tensor is (d_0, d_1, ... d_n). ", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (!hasInputShape(ctx, 0)) | |
return; | |
auto& input_shape = getInputShape(ctx, 0); | |
int rank = static_cast<int>(input_shape.dim_size()); | |
int axis = static_cast<int>(getAttribute(ctx, "axis", 1)); | |
if (axis < 0) { | |
axis += rank; | |
} | |
if (axis > rank || axis < 0) { | |
fail_shape_inference("Invalid value(", axis, ") for attribute 'axis'"); | |
} | |
// TODO: is the operation defined for input-rank < 2? | |
updateOutputShape(ctx, 0, {multiplyDims(input_shape, 0, axis), multiplyDims(input_shape, axis, rank)}); | |
})); | |
static const char* LRN_ver1_doc = R"DOC( | |
Local Response Normalization proposed in the [AlexNet paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf). | |
It normalizes over local input regions. | |
The local region is defined across the channels. For an element X[n, c, d1, ..., dk] in a tensor | |
of shape (N x C x D1 x D2, ..., Dk), its region is | |
{X[n, i, d1, ..., dk] | max(0, c - floor((size - 1) / 2)) <= i <= min(C - 1, c + ceil((size - 1) / 2))}. | |
square_sum[n, c, d1, ..., dk] = sum(X[n, i, d1, ..., dk] ^ 2), | |
where max(0, c - floor((size - 1) / 2)) <= i <= min(C - 1, c + ceil((size - 1) / 2)). | |
Y[n, c, d1, ..., dk] = X[n, c, d1, ..., dk] / (bias + alpha / size * square_sum[n, c, d1, ..., dk] ) ^ beta | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
LRN, | |
1, | |
OpSchema() | |
.Attr("size", "The number of channels to sum over", AttributeProto::INT) | |
.Attr("alpha", "Scaling parameter.", AttributeProto::FLOAT, 0.0001f) | |
.Attr("beta", "The exponent.", AttributeProto::FLOAT, 0.75f) | |
.Attr("bias", "", AttributeProto::FLOAT, 1.0f) | |
.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the batch " | |
"size. Optionally, if dimension denotation is " | |
"in effect, the operation expects the input " | |
"data tensor to arrive with the dimension denotation " | |
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", | |
"T") | |
.Output(0, "Y", "Output tensor, which has the shape and type as input tensor", "T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output " | |
" types to float tensors.") | |
.SetDoc(LRN_ver1_doc) | |
.TypeAndShapeInferenceFunction(propagateShapeAndTypeFromFirstInput)); | |
static const char* mvn_ver9_doc = R"DOC( | |
A MeanVarianceNormalization Function: Perform mean variance normalization | |
on the input tensor X using formula: <br/> ``` (X-EX)/sqrt(E(X-EX)^2) ``` | |
)DOC"; | |
static const std::vector<int64_t> old_mvn_default_axes = {0, 2, 3}; | |
ONNX_OPERATOR_SET_SCHEMA( | |
MeanVarianceNormalization, | |
9, | |
OpSchema() | |
.SetDoc(mvn_ver9_doc) | |
.Input(0, "X", "Input tensor", "T") | |
.Output(0, "Y", "Output tensor", "T") | |
.Attr( | |
"axes", | |
"A list of integers, along which to reduce. The default is to " | |
"calculate along axes [0,2,3] for calculating mean and variance " | |
"along each channel. Two variables with the same C-coordinate " | |
"are associated with the same mean and variance.", | |
AttributeProto::INTS, | |
old_mvn_default_axes) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to all numeric tensors.") | |
.FunctionBody(FunctionBodyHelper::BuildNodes( | |
{// nodes: {outputs, op, inputs, attributes} | |
FunctionBodyHelper::Const<float>("Exponent", 2.0f), | |
FunctionBodyHelper::Const<float>("Epsilon", float(1e-9)), | |
{{"X_RM"}, "ReduceMean", {"X"}, {MakeRefAttribute("axes", AttributeProto::INTS)}}, | |
{{"EX_squared"}, "Pow", {"X_RM", "Exponent"}}, | |
{{"X_squared"}, "Pow", {"X", "Exponent"}}, | |
{{"E_Xsquared"}, "ReduceMean", {"X_squared"}, {MakeRefAttribute("axes", AttributeProto::INTS)}}, | |
{{"Variance"}, "Sub", {"E_Xsquared", "EX_squared"}}, | |
{{"STD"}, "Sqrt", {"Variance"}}, | |
{{"X_variance"}, "Sub", {"X", "X_RM"}}, | |
{{"Processed_STD"}, "Add", {"STD", "Epsilon"}}, | |
{{"Y"}, "Div", {"X_variance", "Processed_STD"}}}))); | |
const char* pads_doc2 = | |
"Padding for the beginning and ending along each spatial axis, it can take any value greater " | |
"than or equal to 0. The value represent the number of pixels added to the beginning " | |
"and end part of the corresponding axis. `pads` format should be as follow " | |
"[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels " | |
"added at the beginning of axis `i` and xi_end, the number of pixels added at " | |
"the end of axis `i`. This attribute cannot be used simultaneously with " | |
"auto_pad attribute. If not present, the padding defaults to 0 along start and end of each spatial axis."; | |
const char* auto_pad_doc2 = | |
"auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where " | |
"default value is NOTSET, which means explicit padding is used. " | |
"SAME_UPPER or SAME_LOWER mean pad the input so that the output spatial size match the input." | |
"In case of odd number add the extra padding at the end for SAME_UPPER and at the " | |
"beginning for SAME_LOWER. VALID mean no padding."; | |
const char* auto_pad_doc3 = | |
"auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where " | |
"default value is NOTSET, which means explicit padding is used. " | |
"SAME_UPPER or SAME_LOWER mean pad the input so that " | |
"`output_shape[i] = ceil(input_shape[i] / strides[i])` for each axis `i`. " | |
"The padding is split between the two sides equally or almost equally (depending " | |
"on whether it is even or odd). In case the padding is an odd number, the extra " | |
"padding is added at the end for SAME_UPPER and at the beginning for SAME_LOWER."; | |
void convPoolShapeInference1( | |
InferenceContext& ctx, | |
bool use_dilation, | |
bool require_kernel_shape, | |
int input1Idx, | |
int input2Idx) { | |
// we need the first input shape for this inference. | |
if (!hasInputShape(ctx, input1Idx)) { | |
return; | |
} | |
// if kernel shape is an input (and not attribute) | |
// we need the shape of the second input. | |
if (!require_kernel_shape && !hasInputShape(ctx, input2Idx)) { | |
return; | |
} | |
auto input_shape = ctx.getInputType(input1Idx)->tensor_type().shape(); | |
if (input_shape.dim_size() < 2) { | |
fail_shape_inference("Input tensor must have at least 2 dimensions"); | |
} | |
// first dim is the batch axis and the next is the number of channels. | |
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2); | |
// Only MaxPool and Conv support dilation. For | |
// simplicity of the code, we just treat the rest of them as having all-1s | |
// dilation. | |
std::vector<int64_t> dilations; | |
if (use_dilation && getRepeatedAttribute(ctx, "dilations", dilations)) { | |
if (dilations.size() != n_input_dims) { | |
fail_shape_inference("Attribute dilations has incorrect size"); | |
} | |
} else { | |
dilations.assign(n_input_dims, 1); | |
} | |
std::vector<int64_t> strides; | |
if (getRepeatedAttribute(ctx, "strides", strides)) { | |
if (strides.size() != n_input_dims) { | |
fail_shape_inference("Attribute strides has incorrect size"); | |
} | |
} else { | |
strides.assign(n_input_dims, 1); | |
} | |
std::vector<int64_t> kernel_shape; | |
if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) { | |
if (kernel_shape.size() != n_input_dims) { | |
fail_shape_inference("Attribute kernel_shape has incorrect size"); | |
} | |
} else if (require_kernel_shape) { | |
fail_shape_inference("Attribute kernel_shape must be specified"); | |
} else { | |
auto second_input_shape = ctx.getInputType(input2Idx)->tensor_type().shape(); | |
for (int i = 2; i < second_input_shape.dim_size(); ++i) { | |
if (!second_input_shape.dim(i).has_dim_value()) { | |
return; | |
} | |
kernel_shape.push_back(second_input_shape.dim(i).dim_value()); | |
} | |
} | |
std::vector<int64_t> effective_kernel_shape = kernel_shape; | |
for (int i = 0; i < static_cast<int>(kernel_shape.size()); i++) { | |
// accounting for dilation, how big is the kernel in this dimension | |
effective_kernel_shape[i] = (effective_kernel_shape[i] - 1) * dilations[i] + 1; | |
} | |
std::vector<int64_t> pads; | |
if (getRepeatedAttribute(ctx, "pads", pads)) { | |
if (pads.size() != n_input_dims * 2) { | |
fail_shape_inference("Attribute pads has incorrect size"); | |
} | |
} else { | |
pads.assign(n_input_dims * 2, 0); | |
const auto* auto_pad_attr = ctx.getAttribute("auto_pad"); | |
if ((nullptr != auto_pad_attr) && (auto_pad_attr->s() != "VALID")) { | |
int input_dims_size = static_cast<int>(n_input_dims); | |
for (int i = 0; i < input_dims_size; ++i) { | |
int64_t residual = 0; | |
int64_t stride = strides[i]; | |
if (stride > 1) { | |
if (!input_shape.dim(2 + i).has_dim_value()) { | |
continue; | |
} | |
residual = input_shape.dim(2 + i).dim_value(); | |
while (residual >= stride) { | |
residual -= stride; | |
} | |
} | |
int64_t total_pad = residual == 0 ? effective_kernel_shape[i] - stride : effective_kernel_shape[i] - residual; | |
if (total_pad < 0) | |
total_pad = 0; | |
int64_t half_pad_small = total_pad >> 1; | |
int64_t half_pad_big = total_pad - half_pad_small; | |
if (auto_pad_attr->s() == "SAME_UPPER") { | |
pads[i] = half_pad_small; | |
pads[i + input_dims_size] = half_pad_big; | |
} else if (auto_pad_attr->s() == "SAME_LOWER") { | |
pads[i] = half_pad_big; | |
pads[i + input_dims_size] = half_pad_small; | |
} | |
} | |
} | |
} | |
auto output_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape(); | |
if (require_kernel_shape) { | |
// add the first two dimensions from the input. | |
*output_shape->add_dim() = input_shape.dim(0); | |
*output_shape->add_dim() = input_shape.dim(1); | |
} else { | |
*output_shape->add_dim() = input_shape.dim(0); | |
auto& second_input_shape = getInputShape(ctx, input2Idx); | |
if (second_input_shape.dim_size() < 1) { | |
fail_shape_inference("Second input tensor has wrong dimension"); | |
} | |
*output_shape->add_dim() = second_input_shape.dim(0); | |
} | |
int kernel_shape_size = static_cast<int>(kernel_shape.size()); | |
for (int i = 0; i < kernel_shape_size; ++i) { | |
auto newdim = output_shape->add_dim(); | |
if (!input_shape.dim(2 + i).has_dim_value()) { | |
continue; | |
} | |
// how big is the input, including padding | |
int64_t effective_input_size = input_shape.dim(2 + i).dim_value(); | |
effective_input_size += pads[i]; | |
effective_input_size += pads[i + kernel_shape_size]; | |
// default is floor mode .i.e. ceil_mode is set to 0 | |
auto ceil_mode = getAttribute(ctx, "ceil_mode", 0); | |
// how many times we can move the kernel from it's initial position, based | |
// on the stride | |
int64_t strided_kernel_positions; | |
if (ceil_mode == 1) | |
strided_kernel_positions = | |
(int64_t)(std::ceil((effective_input_size - effective_kernel_shape[i]) / float(strides[i]))); | |
else | |
strided_kernel_positions = (effective_input_size - effective_kernel_shape[i]) / strides[i]; | |
// add in the initial position | |
newdim->set_dim_value(1 + strided_kernel_positions); | |
} | |
if (ctx.getNumOutputs() > 1) { | |
// MaxPool with two outputs case. | |
auto second_output_shape = ctx.getOutputType(1)->mutable_tensor_type()->mutable_shape(); | |
second_output_shape->CopyFrom(*output_shape); | |
} | |
} | |
std::function<void(OpSchema&)> | |
PoolOpSchemaGenerator_9(const char* name, const char* opName, const char* additionalDescription) { | |
return [=](OpSchema& schema) { | |
std::string doc; | |
POPULATE_OP_DOC_STR(doc = R"DOC( | |
{name} consumes an input tensor X and applies {opName} pooling across | |
the tensor according to kernel sizes, stride sizes, and pad lengths. | |
{opName} pooling consisting of computing the {opName} on all values of a | |
subset of the input tensor according to the kernel size and downsampling the | |
data into the output tensor Y for further processing. The output spatial shape will be following: | |
``` | |
output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - kernel_spatial_shape[i]) / strides_spatial_shape[i] + 1) | |
* pad_shape[i] is sum of pads along axis i | |
``` | |
`auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following: | |
``` | |
VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - kernel_spatial_shape[i] + 1) / strides_spatial_shape[i]) | |
SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i]) | |
``` | |
And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`: | |
``` | |
pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + kernel_spatial_shape[i] - input_spatial_shape[i] | |
``` | |
{additionalDescription} | |
)DOC"; | |
ReplaceAll(doc, "{name}", name); | |
ReplaceAll(doc, "{opName}", opName); | |
ReplaceAll(doc, "{additionalDescription}", additionalDescription);); | |
schema.SetDoc(doc); | |
schema.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS); | |
schema.Attr("strides", "Stride along each spatial axis.", AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr("auto_pad", auto_pad_doc2, AttributeProto::STRING, std::string("NOTSET")); | |
schema.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the batch " | |
"size. Optionally, if dimension denotation is " | |
"in effect, the operation expects the input " | |
"data tensor to arrive with the dimension denotation " | |
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", | |
"T"); | |
schema.Output( | |
0, | |
"Y", | |
"Output data tensor from average or max pooling across " | |
"the input tensor. Dimensions will vary based " | |
"on various kernel, stride, and pad sizes. Floor value of " | |
"the dimension is used", | |
"T"); | |
schema.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors."); | |
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (ctx.getNumOutputs() > 1) { | |
// MaxPool with two outputs case. | |
auto output_type = ctx.getOutputType(1); | |
if (output_type->value_case() == TypeProto::kTensorType || | |
output_type->value_case() == TypeProto::VALUE_NOT_SET) { | |
output_type->mutable_tensor_type()->set_elem_type(TensorProto::INT64); | |
} | |
} | |
convPoolShapeInference1(ctx, false, true, 0, 1); | |
}); | |
}; | |
} | |
std::function<void(OpSchema&)> PoolOpSchemaGenerator_10( | |
const char* name, | |
const char* opName, | |
const char* additionalDescription, | |
bool use_dilation, | |
int opsetNum) { | |
return [=](OpSchema& schema) { | |
std::string doc; | |
POPULATE_OP_DOC_STR( | |
doc = R"DOC( | |
{name} consumes an input tensor X and applies {opName} pooling across | |
the tensor according to kernel sizes, stride sizes, and pad lengths. | |
{opName} pooling consisting of computing the {opName} on all values of a | |
subset of the input tensor according to the kernel size and downsampling the | |
data into the output tensor Y for further processing. The output spatial shape will be following: | |
``` | |
output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - {kernelSpatialShape}) / strides_spatial_shape[i] + 1) | |
``` | |
or | |
``` | |
output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - {kernelSpatialShape}) / strides_spatial_shape[i] + 1) | |
``` | |
if ceil_mode is enabled | |
``` | |
* pad_shape[i] is sum of pads along axis i | |
``` | |
`auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following: | |
``` | |
VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - {kernelSpatialShape} + 1) / strides_spatial_shape[i]) | |
SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i]) | |
``` | |
And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`: | |
``` | |
pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + {kernelSpatialShape} - input_spatial_shape[i] | |
``` | |
{additionalDescription} | |
)DOC"; | |
ReplaceAll(doc, "{name}", name); | |
ReplaceAll(doc, "{opName}", opName); | |
ReplaceAll(doc, "{additionalDescription}", additionalDescription); | |
ReplaceAll( | |
doc, | |
"{kernelSpatialShape}", | |
use_dilation ? "((kernel_spatial_shape[i] - 1) * dilations[i] + 1)" : "kernel_spatial_shape[i]");); | |
schema.SetDoc(doc); | |
schema.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS); | |
schema.Attr( | |
"strides", | |
opsetNum == 11 | |
? "Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis." | |
: "Stride along each spatial axis.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE); | |
schema.Attr("auto_pad", auto_pad_doc2, AttributeProto::STRING, std::string("NOTSET")); | |
schema.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr( | |
"ceil_mode", | |
"Whether to use ceil or floor (default) to compute the output shape.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)); | |
schema.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the batch " | |
"size. Optionally, if dimension denotation is " | |
"in effect, the operation expects the input " | |
"data tensor to arrive with the dimension denotation " | |
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", | |
"T"); | |
schema.Output( | |
0, | |
"Y", | |
"Output data tensor from average or max pooling across " | |
"the input tensor. Dimensions will vary based " | |
"on various kernel, stride, and pad sizes. Floor value of " | |
"the dimension is used", | |
"T"); | |
schema.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors."); | |
schema.TypeAndShapeInferenceFunction([use_dilation](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (ctx.getNumOutputs() > 1) { | |
// MaxPool with two outputs case. | |
auto output_type = ctx.getOutputType(1); | |
if (output_type->value_case() == TypeProto::kTensorType || | |
output_type->value_case() == TypeProto::VALUE_NOT_SET) { | |
output_type->mutable_tensor_type()->set_elem_type(TensorProto::INT64); | |
} | |
} | |
convPoolShapeInference1(ctx, use_dilation, true, 0, 1); | |
}); | |
}; | |
} | |
std::vector<std::string> GetSupportedDataTypesForPoolingOps_1(bool supports8bit) { | |
if (supports8bit) { | |
return {"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(int8)", "tensor(uint8)"}; | |
} | |
return {"tensor(float16)", "tensor(float)", "tensor(double)"}; | |
} | |
std::function<void(OpSchema&)> PoolOpSchemaGenerator_11( | |
const char* name, | |
const char* opName, | |
const char* additionalDescription, | |
bool use_dilation, | |
bool supports8bit = false) { | |
return [=](OpSchema& schema) { | |
std::string doc; | |
POPULATE_OP_DOC_STR( | |
doc = R"DOC( | |
{name} consumes an input tensor X and applies {opName} pooling across | |
the tensor according to kernel sizes, stride sizes, and pad lengths. | |
{opName} pooling consisting of computing the {opName} on all values of a | |
subset of the input tensor according to the kernel size and downsampling the | |
data into the output tensor Y for further processing. The output spatial shape will be following: | |
``` | |
output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - {kernelSpatialShape}) / strides_spatial_shape[i] + 1) | |
``` | |
or | |
``` | |
output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - {kernelSpatialShape}) / strides_spatial_shape[i] + 1) | |
``` | |
if ceil_mode is enabled | |
``` | |
* pad_shape[i] is sum of pads along axis i | |
``` | |
`auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled: | |
``` | |
VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - {kernelSpatialShape} + 1) / strides_spatial_shape[i]) | |
SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i]) | |
``` | |
or when ceil_mode is disabled: | |
``` | |
VALID: output_spatial_shape[i] = floor((input_spatial_shape[i] - {kernelSpatialShape} + 1) / strides_spatial_shape[i]) | |
SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = floor(input_spatial_shape[i] / strides_spatial_shape[i]) | |
``` | |
And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`: | |
``` | |
pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + {kernelSpatialShape} - input_spatial_shape[i] | |
``` | |
{additionalDescription} | |
)DOC"; | |
ReplaceAll(doc, "{name}", name); | |
ReplaceAll(doc, "{opName}", opName); | |
ReplaceAll(doc, "{additionalDescription}", additionalDescription); | |
ReplaceAll( | |
doc, | |
"{kernelSpatialShape}", | |
use_dilation ? "((kernel_spatial_shape[i] - 1) * dilations[i] + 1)" : "kernel_spatial_shape[i]");); | |
schema.SetDoc(doc); | |
schema.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS); | |
schema.Attr( | |
"strides", | |
"Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE); | |
schema.Attr("auto_pad", auto_pad_doc3, AttributeProto::STRING, std::string("NOTSET")); | |
schema.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr( | |
"ceil_mode", | |
"Whether to use ceil or floor (default) to compute the output shape.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)); | |
schema.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the batch " | |
"size. Optionally, if dimension denotation is " | |
"in effect, the operation expects the input " | |
"data tensor to arrive with the dimension denotation " | |
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable); | |
schema.Output( | |
0, | |
"Y", | |
"Output data tensor from average or max pooling across " | |
"the input tensor. Dimensions will vary based " | |
"on various kernel, stride, and pad sizes. Floor value of " | |
"the dimension is used", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable); | |
schema.TypeConstraint( | |
"T", | |
GetSupportedDataTypesForPoolingOps_1(supports8bit), | |
supports8bit ? "Constrain input and output types to float and 8 bit tensors." | |
: "Constrain input and output types to float tensors."); | |
schema.TypeAndShapeInferenceFunction([use_dilation](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (ctx.getNumOutputs() > 1) { | |
// MaxPool with two outputs case. | |
auto output_type = ctx.getOutputType(1); | |
if (output_type->value_case() == TypeProto::kTensorType || | |
output_type->value_case() == TypeProto::VALUE_NOT_SET) { | |
output_type->mutable_tensor_type()->set_elem_type(TensorProto::INT64); | |
} | |
} | |
convPoolShapeInference1(ctx, use_dilation, true, 0, 1); | |
}); | |
}; | |
} | |
ONNX_OPERATOR_SET_SCHEMA( | |
AveragePool, | |
1, | |
OpSchema().FillUsing(PoolOpSchemaGenerator_9( | |
"AveragePool", | |
"average", | |
"The output of each pooling window is divided by the number of elements exclude pad."))); | |
ONNX_OPERATOR_SET_SCHEMA( | |
AveragePool, | |
7, | |
OpSchema() | |
.FillUsing(PoolOpSchemaGenerator_9( | |
"AveragePool", | |
"average", | |
"The output of each pooling window is divided by the number of elements (exclude pad when attribute count_include_pad is zero).")) | |
.Attr( | |
"count_include_pad", | |
"Whether include pad pixels when calculating values for the edges. Default is 0, doesn't count include pad.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0))); | |
ONNX_OPERATOR_SET_SCHEMA( | |
AveragePool, | |
10, | |
OpSchema() | |
.FillUsing(PoolOpSchemaGenerator_10( | |
"AveragePool", | |
"average", | |
"The output of each pooling window is divided by the number of elements (exclude pad when attribute count_include_pad is zero).", | |
false, | |
10)) | |
.Attr( | |
"count_include_pad", | |
"Whether include pad pixels when calculating values for the edges. Default is 0, doesn't count include pad.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0))); | |
ONNX_OPERATOR_SET_SCHEMA( | |
AveragePool, | |
11, | |
OpSchema() | |
.FillUsing(PoolOpSchemaGenerator_11( | |
"AveragePool", | |
"average", | |
"The output of each pooling window is divided by the number of elements (exclude pad when attribute count_include_pad is zero).", | |
true, | |
false)) | |
.Attr( | |
"count_include_pad", | |
"Whether include pad pixels when calculating values for the edges. Default is 0, doesn't count include pad.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0))); | |
ONNX_OPERATOR_SET_SCHEMA( | |
MaxPool, | |
1, | |
OpSchema().FillUsing(PoolOpSchemaGenerator_9( | |
"MaxPool", | |
"max", | |
"The output of each pooling window is maximum number of elements exclude pad."))); | |
ONNX_OPERATOR_SET_SCHEMA( | |
MaxPool, | |
8, | |
OpSchema() | |
.FillUsing(PoolOpSchemaGenerator_9( | |
"MaxPool", | |
"max", | |
"The output of each pooling window is maximum number of elements exclude pad.")) | |
.Attr( | |
"storage_order", | |
"The storage order of the tensor. 0 is row major, and 1 is column major.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Output( | |
1, | |
"Indices", | |
"Indices tensor from max pooling across the input tensor. " | |
"The dimensions of indices are the same as output tensor. " | |
"The values in indices of are the indices of the selected values during pooling. " | |
"The indices are computed as flatten 1-D tensor, " | |
"and the indices do not consider padding. " | |
"So the values in indices are in [0, N x C x D1 x ... x Dn).", | |
"I", | |
OpSchema::Optional) | |
.TypeConstraint("I", {"tensor(int64)"}, "Constrain index tensor to int64")); | |
ONNX_OPERATOR_SET_SCHEMA( | |
MaxPool, | |
10, | |
OpSchema() | |
.FillUsing(PoolOpSchemaGenerator_10( | |
"MaxPool", | |
"max", | |
"The output of each pooling window is maximum number of elements exclude pad.", | |
true, | |
10)) | |
.Attr( | |
"storage_order", | |
"The storage order of the tensor. 0 is row major, and 1 is column major.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Attr("dilations", "Dilation value along each spatial axis of filter.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Output( | |
1, | |
"Indices", | |
"Indices tensor from max pooling across the input tensor. " | |
"The dimensions of indices are the same as output tensor. " | |
"The values in indices of are the indices of the selected values during pooling. " | |
"The indices are computed as flatten 1-D tensor, " | |
"and the indices do not consider padding. " | |
"So the values in indices are in [0, N x C x D1 x ... x Dn).", | |
"I", | |
OpSchema::Optional) | |
.TypeConstraint("I", {"tensor(int64)"}, "Constrain index tensor to int64")); | |
ONNX_OPERATOR_SET_SCHEMA( | |
MaxPool, | |
11, | |
OpSchema() | |
.FillUsing(PoolOpSchemaGenerator_10( | |
"MaxPool", | |
"max", | |
"The output of each pooling window is maximum number of elements exclude pad.", | |
true, | |
11)) | |
.Attr( | |
"storage_order", | |
"The storage order of the tensor. 0 is row major, and 1 is column major.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Attr( | |
"dilations", | |
"Dilation value along each spatial axis of filter. If not present, the dilation defaults to 1 along each spatial axis.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Output( | |
1, | |
"Indices", | |
"Indices tensor from max pooling across the input tensor. " | |
"The dimensions of indices are the same as output tensor. " | |
"The values in indices of are the indices of the selected values during pooling. " | |
"The indices are computed as flatten 1-D tensor, " | |
"and the indices do not consider padding. " | |
"So the values in indices are in [0, N x C x D1 x ... x Dn).", | |
"I", | |
OpSchema::Optional) | |
.TypeConstraint("I", {"tensor(int64)"}, "Constrain index tensor to int64")); | |
void maxUnpoolShapeInference1(InferenceContext& ctx) { | |
// we need at least two inputs to have a shape for this inference. | |
if (ctx.getNumInputs() != 2 && ctx.getNumInputs() != 3) { | |
fail_type_inference("MaxUnpool op must have either two or three inputs."); | |
} | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (!hasInputShape(ctx, 0)) { | |
return; // If first input does not have shape, we cannot infer much. | |
} | |
auto input_shape = ctx.getInputType(0)->tensor_type().shape(); | |
if (input_shape.dim_size() < 2) { | |
fail_shape_inference("Input tensor X must have at least 2 dimensions."); | |
} | |
// first dim is the batch axis and the next is the number of channels. | |
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2); | |
std::vector<int64_t> pads; | |
if (getRepeatedAttribute(ctx, "pads", pads)) { | |
if (pads.size() != n_input_dims * 2) { | |
fail_shape_inference("Attribute pads has incorrect size."); | |
} | |
} else { | |
pads.assign(n_input_dims * 2, 0); | |
} | |
std::vector<int64_t> strides; | |
if (getRepeatedAttribute(ctx, "strides", strides)) { | |
if (strides.size() != n_input_dims) { | |
fail_shape_inference("Attribute strides has incorrect size."); | |
} | |
} else { | |
strides.assign(n_input_dims, 1); | |
} | |
std::vector<int64_t> kernel_shape; | |
if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) { | |
if (kernel_shape.size() != n_input_dims) { | |
fail_shape_inference("Attribute kernel_shape has incorrect size."); | |
} | |
} else { | |
fail_shape_inference("Attribute kernel_shape must be specified."); | |
} | |
if (ctx.getNumInputs() == 3) { | |
// If the third input, output_size, is specified, then use that instead | |
// of inferring shape from inputs. | |
if (hasInputShape(ctx, 2)) { | |
auto& output_shape = getInputShape(ctx, 2); | |
if (output_shape.dim_size() != 1) { | |
fail_type_inference("'output_shape' must be rank 1 tensor."); | |
} | |
if (output_shape.dim((int)0).has_dim_value() && | |
static_cast<int>(output_shape.dim((int)0).dim_value()) != input_shape.dim_size()) { | |
fail_shape_inference("'output_shape' must have same number of elements as the shape of input tensor X."); | |
} | |
} | |
return; // 'output_shape' is specified as input. Actual shape will be | |
// determined at runtime. | |
} | |
auto final_output_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape(); | |
*final_output_shape->add_dim() = input_shape.dim(0); | |
*final_output_shape->add_dim() = | |
ctx.getInputType(1)->tensor_type().shape().dim(1); // channels should be the second dim of second input. | |
int kernel_shape_size = static_cast<int>(kernel_shape.size()); | |
for (int i = 0; i < kernel_shape_size; ++i) { | |
auto newdim = final_output_shape->add_dim(); | |
if (!input_shape.dim(2 + i).has_dim_value()) { | |
continue; | |
} | |
int64_t newdim_value = strides[i] * (input_shape.dim(2 + i).dim_value() - 1); | |
newdim_value += kernel_shape[i]; | |
newdim_value -= pads[i]; | |
newdim_value -= pads[i + kernel_shape_size]; | |
// add in the initial position | |
newdim->set_dim_value(newdim_value); | |
} | |
} | |
static const char* MaxUnpool_ver9_doc = R"DOC( | |
MaxUnpool essentially computes the partial inverse of the MaxPool op. | |
The input information to this op is typically the output information from a MaxPool op. The first | |
input tensor X is the tensor that needs to be unpooled, which is typically the pooled tensor (first output) | |
from MaxPool. The second input tensor, I, contains the indices to the (locally maximal) elements corresponding | |
to the elements in the first input tensor X. Input tensor I is typically the second output of the MaxPool op. | |
The third (optional) input is a tensor that specifies the output size of the unpooling operation. | |
MaxUnpool is intended to do 'partial' inverse of the MaxPool op. 'Partial' because all the non-maximal | |
values from the original input to MaxPool are set to zero in the output of the MaxUnpool op. Pooling | |
the result of an unpooling operation should give back the original input to the unpooling op. | |
MaxUnpool can produce the same output size for several input sizes, which makes unpooling op ambiguous. | |
The third input argument, output_size, is meant to disambiguate the op and produce output tensor of | |
known/predictable size. | |
In addition to the inputs, MaxUnpool takes three attributes, namely kernel_shape, strides, and pads, | |
which define the exact unpooling op. The attributes typically have the same values as the corresponding | |
pooling op that the unpooling op is trying to invert. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
MaxUnpool, | |
9, | |
OpSchema() | |
.SetDoc(MaxUnpool_ver9_doc) | |
.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS) | |
.Attr("strides", "Stride along each spatial axis.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE) | |
.Input( | |
0, | |
"X", | |
"Input data tensor that has to be unpooled. " | |
"This tensor is typically the first output of the MaxPool op." | |
"Dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non-image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the batch " | |
"size. Optionally, if dimension denotation is " | |
"in effect, the operation expects the input " | |
"data tensor to arrive with the dimension denotation " | |
"of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", | |
"T1") | |
.Input( | |
1, | |
"I", | |
"Input data tensor containing the indices corresponding to " | |
"elements in the first input tensor X." | |
"This tensor is typically the second output of the MaxPool op." | |
"Dimensions must be the same as input tensor X. " | |
"The indices are linear, i.e. computed considering the tensor as flattened 1-D tensor, " | |
"assuming row-major storage. Also, the linear indices should not consider padding. " | |
"So the values in indices are in the range [0, N x C x D1 x ... x Dn).", | |
"T2") | |
.Input( | |
2, | |
"output_shape", | |
"The shape of the output can be explicitly set which will cause pads values to be auto generated. If 'output_shape' is specified, " | |
"'pads' values are ignored.", | |
"T2", | |
OpSchema::Optional) | |
.Output(0, "output", "Output data tensor that contains the result of the unpooling.", "T1") | |
.TypeConstraint( | |
"T1", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeConstraint("T2", {"tensor(int64)"}, "Constrain index tensor to int64") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { maxUnpoolShapeInference1(ctx); })); | |
const char* pads_doc1 = | |
"Padding for the beginning and ending along each axis, it can take any value greater " | |
"than or equal to 0. The value represent the number of pixels added to the beginning " | |
"and end part of the corresponding axis. `pads` format should be as follow " | |
"[x1_begin, x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels " | |
"added at the beginning of axis `i` and xi_end, the number of pixels added at " | |
"the end of axis `i`. This attribute cannot be used simultaneously with " | |
"auto_pad attribute."; | |
const char* auto_pad_doc1 = | |
"auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID. Where " | |
"default value is NOTSET, which means explicit padding is used. " | |
"SAME_UPPER or SAME_LOWER mean pad the input so that the output size match the input." | |
"In case of odd number add the extra padding at the end for SAME_UPPER and at the " | |
"beginning for SAME_LOWER. VALID mean no padding. DEPRECATION NOTE: auto_pad is " | |
"only intended to support legacy uses, and for framework authors, one is explicitly " | |
"encouraged to use explicit padding specified in the pads attribute."; | |
static const char* LpPool_ver1_doc = R"DOC( | |
LpPool consumes an input tensor X and applies Lp pooling across the | |
the tensor according to kernel sizes, stride sizes, and pad lengths. | |
Lp pooling consisting of computing the Lp norm on all values of a subset | |
of the input tensor according to the kernel size and downsampling the | |
data into the output tensor Y for further processing.)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
LpPool, | |
1, | |
OpSchema() | |
.SetDoc(LpPool_ver1_doc) | |
.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("strides", "Stride along each axis.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("auto_pad", auto_pad_doc1, AttributeProto::STRING, std::string("NOTSET")) | |
.Attr("pads", pads_doc1, AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"p", | |
"p value of the Lp norm used to pool over the input data, default is 2.0.", | |
AttributeProto::FLOAT, | |
2.0f) | |
.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimension are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the " | |
"batch size.", | |
"T") | |
.Output( | |
0, | |
"Y", | |
"Output data tensor from Lp pooling across the input " | |
"tensor. Dimensions will vary based on various kernel, stride, and pad " | |
"sizes.", | |
"T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.")); | |
std::function<void(OpSchema&)> LpPoolOpSchemaGenerator_10(const char* name) { | |
return [=](OpSchema& schema) { | |
std::string doc; | |
POPULATE_OP_DOC_STR(doc = R"DOC( | |
{name} consumes an input tensor X and applies Lp pooling across | |
the tensor according to kernel sizes, stride sizes, and pad lengths. | |
Lp pooling consisting of computing the Lp norm on all values of a subset | |
of the input tensor according to the kernel size and downsampling the | |
data into the output tensor Y for further processing.)DOC"; | |
ReplaceAll(doc, "{name}", name);); | |
schema.SetDoc(doc); | |
schema.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS); | |
schema.Attr("strides", "Stride along each spatial axis.", AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr("auto_pad", auto_pad_doc2, AttributeProto::STRING, std::string("NOTSET")); | |
schema.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr( | |
"p", "p value of the Lp norm used to pool over the input data.", AttributeProto::INT, static_cast<int64_t>(2)); | |
schema.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the " | |
"batch size.", | |
"T"); | |
schema.Output( | |
0, | |
"Y", | |
"Output data tensor from Lp pooling across the input " | |
"tensor. Dimensions will vary based on various kernel, stride, and pad " | |
"sizes.", | |
"T"); | |
schema.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors."); | |
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
convPoolShapeInference1(ctx, false, true, 0, 1); | |
}); | |
}; | |
} | |
ONNX_OPERATOR_SET_SCHEMA(LpPool, 2, OpSchema().FillUsing(LpPoolOpSchemaGenerator_10("LpPool"))); | |
static const char* GlobalLpPool_ver1_doc = R"DOC( | |
GlobalLpPool consumes an input tensor X and applies lp pool pooling across the | |
the values in the same channel. This is equivalent to LpPool with kernel size | |
equal to the spatial dimension of input tensor.)DOC"; | |
std::function<void(OpSchema&)> LpPoolOpSchemaGenerator_11(const char* name) { | |
return [=](OpSchema& schema) { | |
std::string doc; | |
POPULATE_OP_DOC_STR(doc = R"DOC( | |
{name} consumes an input tensor X and applies Lp pooling across | |
the tensor according to kernel sizes, stride sizes, and pad lengths. | |
Lp pooling consisting of computing the Lp norm on all values of a subset | |
of the input tensor according to the kernel size and downsampling the | |
data into the output tensor Y for further processing.)DOC"; | |
ReplaceAll(doc, "{name}", name);); | |
schema.SetDoc(doc); | |
schema.Attr("kernel_shape", "The size of the kernel along each axis.", AttributeProto::INTS); | |
schema.Attr( | |
"strides", | |
"Stride along each spatial axis. If not present, the stride defaults to 1 along each spatial axis.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE); | |
schema.Attr("auto_pad", auto_pad_doc3, AttributeProto::STRING, std::string("NOTSET")); | |
schema.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr( | |
"p", "p value of the Lp norm used to pool over the input data.", AttributeProto::INT, static_cast<int64_t>(2)); | |
schema.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the " | |
"batch size.", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable); | |
schema.Output( | |
0, | |
"Y", | |
"Output data tensor from Lp pooling across the input " | |
"tensor. Dimensions will vary based on various kernel, stride, and pad " | |
"sizes.", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable); | |
schema.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors."); | |
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
convPoolShapeInference1(ctx, false, true, 0, 1); | |
}); | |
}; | |
} | |
ONNX_OPERATOR_SET_SCHEMA(LpPool, 11, OpSchema().FillUsing(LpPoolOpSchemaGenerator_11("LpPool"))); | |
std::function<void(OpSchema&)> ConvOpSchemaGenerator_10(const char* filter_desc) { | |
return [=](OpSchema& schema) { | |
std::string doc; | |
POPULATE_OP_DOC_STR(doc = R"DOC( | |
The convolution operator consumes an input tensor and {filter_desc}, and | |
computes the output.)DOC"; | |
ReplaceAll(doc, "{filter_desc}", filter_desc);); | |
schema.SetDoc(doc); | |
schema.Input( | |
0, | |
"X", | |
"Input data tensor from previous layer; " | |
"has size (N x C x H x W), where N is the batch size, " | |
"C is the number of channels, and H and W are the " | |
"height and width. Note that this is for the 2D image. " | |
"Otherwise the size is (N x C x D1 x D2 ... x Dn). " | |
"Optionally, if dimension denotation is " | |
"in effect, the operation expects input data tensor " | |
"to arrive with the dimension denotation of [DATA_BATCH, " | |
"DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].", | |
"T"); | |
schema.Input( | |
1, | |
"W", | |
"The weight tensor that will be used in the " | |
"convolutions; has size (M x C/group x kH x kW), where C " | |
"is the number of channels, and kH and kW are the " | |
"height and width of the kernel, and M is the number " | |
"of feature maps. For more than 2 dimensions, the " | |
"kernel shape will be (M x C/group x k1 x k2 x ... x kn), " | |
"where (k1 x k2 x ... kn) is the dimension of the kernel. " | |
"Optionally, if dimension denotation is in effect, " | |
"the operation expects the weight tensor to arrive " | |
"with the dimension denotation of [FILTER_OUT_CHANNEL, " | |
"FILTER_IN_CHANNEL, FILTER_SPATIAL, FILTER_SPATIAL ...]. " | |
"X.shape[1] == (W.shape[1] * group) == C " | |
"(assuming zero based indices for the shape array). " | |
"Or in other words FILTER_IN_CHANNEL should be equal to DATA_CHANNEL. ", | |
"T"); | |
schema.Input(2, "B", "Optional 1D bias to be added to the convolution, has size of M.", "T", OpSchema::Optional); | |
schema.Output( | |
0, | |
"Y", | |
"Output data tensor that contains the result of the " | |
"convolution. The output dimensions are functions " | |
"of the kernel size, stride size, and pad lengths.", | |
"T"); | |
schema.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors."); | |
schema.Attr( | |
"kernel_shape", | |
"The shape of the convolution kernel. If not present, should be inferred from input W.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE); | |
schema.Attr( | |
"dilations", "dilation value along each spatial axis of the filter.", AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr("strides", "Stride along each spatial axis.", AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr("auto_pad", auto_pad_doc2, AttributeProto::STRING, std::string("NOTSET")); | |
schema.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr( | |
"group", | |
"number of groups input channels and output channels are divided into.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)); | |
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
convPoolShapeInference1(ctx, true, false, 0, 1); | |
}); | |
}; | |
} | |
ONNX_OPERATOR_SET_SCHEMA(Conv, 1, OpSchema().FillUsing(ConvOpSchemaGenerator_10("a filter"))); | |
void convTransposeShapeInference1(InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
// we need at least two inputs to have a shape for this inference. | |
if (!hasNInputShapes(ctx, 2)) { | |
return; | |
} | |
int64_t group = getAttribute(ctx, "group", 1); | |
auto input_shape = ctx.getInputType(0)->tensor_type().shape(); | |
if (input_shape.dim_size() < 2) { | |
return; // Input tensor should have at least two dimensions. | |
} | |
// first dim is the batch axis and the next is the number of channels. | |
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2); | |
std::vector<int64_t> dilations; | |
if (getRepeatedAttribute(ctx, "dilations", dilations)) { | |
if (dilations.size() != n_input_dims) { | |
return; | |
} | |
} else { | |
dilations.assign(n_input_dims, 1); | |
} | |
std::vector<int64_t> strides; | |
if (getRepeatedAttribute(ctx, "strides", strides)) { | |
if (strides.size() != n_input_dims) { | |
return; | |
} | |
} else { | |
strides.assign(n_input_dims, 1); | |
} | |
std::vector<int64_t> kernel_shape; | |
if (getRepeatedAttribute(ctx, "kernel_shape", kernel_shape)) { | |
if (kernel_shape.size() != n_input_dims) { | |
return; | |
} | |
} else { | |
auto second_input_shape = ctx.getInputType(1)->tensor_type().shape(); | |
for (int i = 2; i < second_input_shape.dim_size(); ++i) { | |
if (!second_input_shape.dim(i).has_dim_value()) { | |
return; | |
} | |
kernel_shape.push_back(second_input_shape.dim(i).dim_value()); | |
} | |
} | |
std::vector<int64_t> effective_kernel_shape = kernel_shape; | |
for (int i = 0; i < static_cast<int>(kernel_shape.size()); i++) { | |
// accounting for dilation, how big is the kernel in this dimension | |
effective_kernel_shape[i] = (effective_kernel_shape[i] - 1) * dilations[i] + 1; | |
} | |
std::vector<int64_t> pads; | |
if (getRepeatedAttribute(ctx, "pads", pads)) { | |
if (pads.size() != n_input_dims * 2) { | |
fail_shape_inference("Attribute pads has incorrect size"); | |
} | |
} else { | |
pads.assign(n_input_dims * 2, 0); | |
const auto* auto_pad_attr = ctx.getAttribute("auto_pad"); | |
if ((nullptr != auto_pad_attr) && (auto_pad_attr->s() != "VALID")) { | |
int input_dims_size = static_cast<int>(n_input_dims); | |
for (int i = 0; i < input_dims_size; ++i) { | |
int64_t total_pad = effective_kernel_shape[i] - strides[i]; | |
if (total_pad < 0) | |
total_pad = 0; | |
int64_t half_pad_small = total_pad >> 1; | |
int64_t half_pad_big = total_pad - half_pad_small; | |
if (auto_pad_attr->s() == "SAME_UPPER") { | |
pads[i] = half_pad_small; | |
pads[i + input_dims_size] = half_pad_big; | |
} else if (auto_pad_attr->s() == "SAME_LOWER") { | |
pads[i] = half_pad_big; | |
pads[i + input_dims_size] = half_pad_small; | |
} | |
} | |
} | |
} | |
std::vector<int64_t> output_shape; | |
bool output_shape_presented = true; | |
if (getRepeatedAttribute(ctx, "output_shape", output_shape)) { | |
if (output_shape.size() != n_input_dims) { | |
return; | |
} | |
} else { | |
output_shape_presented = false; | |
} | |
std::vector<int64_t> output_padding; | |
if (getRepeatedAttribute(ctx, "output_padding", output_padding)) { | |
if (output_padding.size() != n_input_dims) { // Added only to one side. | |
return; | |
} | |
} else { | |
output_padding.assign(n_input_dims, 0); | |
} | |
auto final_output_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape(); | |
*final_output_shape->add_dim() = input_shape.dim(0); | |
*final_output_shape->add_dim() = | |
ctx.getInputType(1)->tensor_type().shape().dim(1) * group; // channels should be the second dim of second input | |
// multiply group. | |
int size_of_output; | |
if (output_shape_presented) { | |
size_of_output = static_cast<int>(output_shape.size()); | |
for (int i = 0; i < size_of_output; ++i) { | |
if (input_shape.dim(i + 2).has_dim_value()) { | |
if (output_shape[i] < input_shape.dim(i + 2).dim_value()) { | |
// TODO: throw exception? | |
return; // output shape value cannot be smaller than the input shape | |
// value | |
} | |
} | |
final_output_shape->add_dim()->set_dim_value(output_shape[i]); | |
} | |
return; | |
} else { | |
size_of_output = input_shape.dim_size() - 2; | |
for (int i = 0; i < size_of_output; ++i) { | |
if (input_shape.dim(i + 2).has_dim_value()) { | |
int64_t output_shape_dim = strides[i] * (input_shape.dim(i + 2).dim_value() - 1) + output_padding[i] + | |
effective_kernel_shape[i] - pads[i] - pads[i + n_input_dims]; | |
final_output_shape->add_dim()->set_dim_value(output_shape_dim); | |
} else { | |
final_output_shape->add_dim(); | |
} | |
} | |
return; | |
} | |
} | |
std::function<void(OpSchema&)> ConvTransposeOpSchemaGenerator_10(const char* filter_desc) { | |
return [=](OpSchema& schema) { | |
std::string doc; | |
POPULATE_OP_DOC_STR(doc = R"DOC( | |
The convolution transpose operator consumes an input tensor and {filter_desc}, | |
and computes the output. | |
If the pads parameter is provided the shape of the output is calculated via the following equation: | |
output_shape[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - pads[start_i] - pads[end_i] | |
output_shape can also be explicitly specified in which case pads values are auto generated using these equations: | |
total_padding[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - output_shape[i] | |
If (auto_pads != SAME_UPPER): pads[start_i] = total_padding[i]/2; pads[end_i] = total_padding[i] - (total_padding[i]/2) | |
Else: pads[start_i] = total_padding[i] - (total_padding[i]/2); pads[end_i] = (total_padding[i]/2). | |
)DOC"; | |
ReplaceAll(doc, "{filter_desc}", filter_desc);); | |
schema.SetDoc(doc); | |
schema.Input( | |
0, | |
"X", | |
"Input data tensor from previous layer; has size (N x C x H x W)" | |
", where N is the batch size, C is the number of channels, and" | |
" H and W are the height and width. Note that this is for the 2D image. " | |
"Otherwise the size is (N x C x D1 x D2 ... x Dn)", | |
"T"); | |
schema.Input( | |
1, | |
"W", | |
"The weight tensor that will be used in the " | |
"convolutions; has size (C x M/group x kH x kW), where C " | |
"is the number of channels, and kH and kW are the " | |
"height and width of the kernel, and M is the number " | |
"of feature maps. For more than 2 dimensions, the " | |
"weight shape will be (C x M/group x k1 x k2 x ... x kn), " | |
"where (k1 x k2 x ... x kn) is the dimension of the kernel. " | |
"The number of channels in the output should be equal to W.shape[1] * group " | |
"(assuming zero based indices of the shape array)", | |
"T"); | |
schema.Input(2, "B", "Optional 1D bias to be added to the convolution, has size of M.", "T", OpSchema::Optional); | |
schema.Output( | |
0, | |
"Y", | |
"Output data tensor that contains the result of the convolution. The " | |
"output dimensions are functions of the kernel size, stride size, " | |
"pad lengths and group count. " | |
"The number of channels in the output should be equal to W.shape[1] * group " | |
"(assuming zero based indices of the shape array)", | |
"T"); | |
schema.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors."); | |
schema.Attr( | |
"kernel_shape", | |
"The shape of the convolution kernel. If not present, should be inferred from input W.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE); | |
schema.Attr( | |
"output_shape", | |
"The shape of the output can be explicitly set which will cause pads values to be auto generated. If output_shape is specified " | |
"pads values are ignored. See doc for details for equations to generate pads", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE); | |
schema.Attr( | |
"output_padding", | |
"The zero-padding added to one side of the output." | |
" This is also called adjs/adjustment in some frameworks.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE); | |
schema.Attr( | |
"dilations", "dilation value along each spatial axis of the filter.", AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr("strides", "Stride along each spatial axis.", AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr("auto_pad", auto_pad_doc2, AttributeProto::STRING, std::string("NOTSET")); | |
schema.Attr("pads", pads_doc2, AttributeProto::INTS, OPTIONAL_VALUE); | |
schema.Attr( | |
"group", | |
"number of groups input channels and output channels are divided into.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)); | |
schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { convTransposeShapeInference1(ctx); }); | |
}; | |
} | |
ONNX_OPERATOR_SET_SCHEMA(ConvTranspose, 1, OpSchema().FillUsing(ConvTransposeOpSchemaGenerator_10("a filter"))); | |
ONNX_OPERATOR_SET_SCHEMA( | |
GlobalLpPool, | |
1, | |
OpSchema() | |
.SetDoc(GlobalLpPool_ver1_doc) | |
.Attr( | |
"p", | |
"p value of the Lp norm used to pool over the input data, default is 2.0.", | |
AttributeProto::FLOAT, | |
2.0f) | |
.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the width " | |
"of the data. For non image case, the dimension are " | |
"in the form of (N x C x D1 x D2 ... Dn), " | |
"where N is the batch size.", | |
"T") | |
.Output( | |
0, | |
"Y", | |
"Output data tensor from pooling across the input " | |
"tensor. Dimensions will be N x C x 1 x 1", | |
"T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.")); | |
static const char* BatchNormalization_ver1_doc = R"DOC( | |
Carries out batch normalization as described in the paper | |
https://arxiv.org/abs/1502.03167. Depending on the mode it is being run, | |
there are multiple cases for the number of outputs, which we list below: | |
Output case #1: Y, mean, var, saved_mean, saved_var (training mode) | |
Output case #2: Y (test mode) | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
BatchNormalization, | |
1, | |
OpSchema() | |
.NumOutputs({1, 5}) | |
.SetDoc(BatchNormalization_ver1_doc) | |
.Attr( | |
"spatial", | |
"If true, compute the mean and variance across all spatial elements " | |
"If false, compute the mean and variance across per feature." | |
"Default is 1.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.Attr( | |
"is_test", | |
"If set to nonzero, run spatial batch normalization in test mode, default is 0.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Attr( | |
"epsilon", | |
"The epsilon value to use to avoid division by zero, default is 1e-5f.", | |
AttributeProto::FLOAT, | |
1e-5f) | |
.Attr( | |
"momentum", | |
"Factor used in computing the running mean and variance." | |
"e.g., running_mean = running_mean * momentum + mean * (1 - momentum), default is 0.9f.", | |
AttributeProto::FLOAT, | |
0.9f) | |
// This attribute was added via AllowConsumed API in OpSchema. | |
// After removing the API, we're now using the Attr API to simulate the | |
// old definition. | |
.Attr("consumed_inputs", "legacy optimization attribute.", AttributeProto::INTS) | |
.Input(0, "X", "The input 4-dimensional tensor of shape NCHW.", "T") | |
.Input( | |
1, | |
"scale", | |
"The scale as a 1-dimensional tensor of size C to be applied to the " | |
"output.", | |
"T") | |
.Input( | |
2, | |
"B", | |
"The bias as a 1-dimensional tensor of size C to be applied to the " | |
"output.", | |
"T") | |
.Input( | |
3, | |
"mean", | |
"The running mean (training) or the estimated mean (testing) " | |
"as a 1-dimensional tensor of size C.", | |
"T") | |
.Input( | |
4, | |
"var", | |
"The running variance (training) or the estimated " | |
"variance (testing) as a 1-dimensional tensor of size C.", | |
"T") | |
.Output(0, "Y", "The output 4-dimensional tensor of the same shape as X.", "T") | |
.Output( | |
1, | |
"mean", | |
"The running mean after the BatchNormalization operator. Must be in-place " | |
"with the input mean. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.Output( | |
2, | |
"var", | |
"The running variance after the BatchNormalization operator. Must be " | |
"in-place with the input var. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.Output( | |
3, | |
"saved_mean", | |
"Saved mean used during training to speed up gradient " | |
"computation. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.Output( | |
4, | |
"saved_var", | |
"Saved variance used during training to speed up " | |
"gradient computation. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.")); | |
static const char* BatchNormalization_ver9_doc = R"DOC( | |
Carries out batch normalization as described in the paper | |
https://arxiv.org/abs/1502.03167. Depending on the mode it is being run, | |
there are multiple cases for the number of outputs, which we list below: | |
Output case #1: Y, mean, var, saved_mean, saved_var (training mode) | |
Output case #2: Y (test mode) | |
For previous (depreciated) non-spatial cases, implementors are suggested | |
to flatten the input shape to (N x C*D1*D2 ..*Dn) before a BatchNormalization Op. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
BatchNormalization, | |
9, | |
OpSchema() | |
.NumOutputs({1, 5}) | |
.SetDoc(BatchNormalization_ver9_doc + GenerateOptionalArgumentsDoc()) | |
.Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) | |
.Attr( | |
"momentum", | |
"Factor used in computing the running mean and variance." | |
"e.g., running_mean = running_mean * momentum + mean * (1 - momentum).", | |
AttributeProto::FLOAT, | |
0.9f) | |
.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions are in the form of (N x C x D1 x D2 ... Dn), " | |
"where N is the batch size, C is the number of channels. " | |
"Statistics are computed for every channel of C over N and D1 to Dn dimensions. " | |
"For image data, input dimensions become (N x C x H x W). " | |
"The op also accepts single dimension input of size N in which case C is assumed to be 1", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Input(1, "scale", "Scale tensor of shape (C).", "T", OpSchema::Single, true, 1, OpSchema::Differentiable) | |
.Input(2, "B", "Bias tensor of shape (C).", "T", OpSchema::Single, true, 1, OpSchema::Differentiable) | |
.Input( | |
3, | |
"mean", | |
"running (training) or estimated (testing) mean tensor of shape (C).", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Input( | |
4, | |
"var", | |
"running (training) or estimated (testing) variance tensor of shape (C).", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Output( | |
0, | |
"Y", | |
"The output tensor of the same shape as X", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Output( | |
1, | |
"mean", | |
"The running mean after the BatchNormalization operator.", | |
"T", | |
OpSchema::Optional, | |
true, | |
1, | |
OpSchema::NonDifferentiable) | |
.Output( | |
2, | |
"var", | |
"The running variance after the BatchNormalization operator.", | |
"T", | |
OpSchema::Optional, | |
true, | |
1, | |
OpSchema::NonDifferentiable) | |
.Output( | |
3, | |
"saved_mean", | |
"Saved mean used during training to speed up gradient " | |
"computation.", | |
"T", | |
OpSchema::Optional, | |
true, | |
1, | |
OpSchema::NonDifferentiable) | |
.Output( | |
4, | |
"saved_var", | |
"Saved variance used during training to speed up " | |
"gradient computation.", | |
"T", | |
OpSchema::Optional, | |
true, | |
1, | |
OpSchema::NonDifferentiable) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateShapeAndTypeFromFirstInput(ctx); | |
// TODO in training mode, it may be possible to infer some of | |
// the other outputs as well. | |
})); | |
static const char* BatchNormalization_ver14_doc = R"DOC( | |
Carries out batch normalization as described in the paper | |
https://arxiv.org/abs/1502.03167. Depending on the mode it is being run, | |
There are five required inputs 'X', 'scale', 'B', 'input_mean' and | |
'input_var'. | |
Note that 'input_mean' and 'input_var' are expected to be the estimated | |
statistics in inference mode (training_mode=False, default), | |
and the running statistics in training mode (training_mode=True). | |
There are multiple cases for the number of outputs, which we list below: | |
Output case #1: Y, running_mean, running_var (training_mode=True) | |
Output case #2: Y (training_mode=False) | |
When training_mode=False, extra outputs are invalid. | |
The outputs are updated as follows when training_mode=True: | |
``` | |
running_mean = input_mean * momentum + current_mean * (1 - momentum) | |
running_var = input_var * momentum + current_var * (1 - momentum) | |
Y = (X - current_mean) / sqrt(current_var + epsilon) * scale + B | |
where: | |
current_mean = ReduceMean(X, axis=all_except_channel_index) | |
current_var = ReduceVar(X, axis=all_except_channel_index) | |
Notice that ReduceVar refers to the population variance, and it equals to | |
sum(sqrd(x_i - x_avg)) / N | |
where N is the population size (this formula does not use sample size N - 1). | |
``` | |
When training_mode=False: | |
``` | |
Y = (X - input_mean) / sqrt(input_var + epsilon) * scale + B | |
``` | |
For previous (depreciated) non-spatial cases, implementors are suggested | |
to flatten the input shape to (N x C * D1 * D2 * ... * Dn) before a BatchNormalization Op. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
BatchNormalization, | |
14, | |
OpSchema() | |
.NumOutputs({1, 3}) | |
.SetDoc(BatchNormalization_ver14_doc + GenerateOptionalArgumentsDoc()) | |
.Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) | |
.Attr( | |
"momentum", | |
"Factor used in computing the running mean and variance." | |
"e.g., running_mean = running_mean * momentum + mean * (1 - momentum).", | |
AttributeProto::FLOAT, | |
0.9f) | |
.Attr( | |
"training_mode", | |
"If set to true, it indicates BatchNormalization is being used for training, and outputs 1, " | |
"2, 3, and 4 would be populated.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions are in the form of (N x C x D1 x D2 ... Dn), " | |
"where N is the batch size, C is the number of channels. " | |
"Statistics are computed for every channel of C over N and D1 to Dn dimensions. " | |
"For image data, input dimensions become (N x C x H x W). " | |
"The op also accepts single dimension input of size N in which case C is assumed to be 1", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Input(1, "scale", "Scale tensor of shape (C).", "T", OpSchema::Single, true, 1, OpSchema::Differentiable) | |
.Input(2, "B", "Bias tensor of shape (C).", "T", OpSchema::Single, true, 1, OpSchema::Differentiable) | |
.Input( | |
3, | |
"input_mean", | |
"running (training) or estimated (testing) mean tensor of shape (C).", | |
"U", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Input( | |
4, | |
"input_var", | |
"running (training) or estimated (testing) variance tensor of shape (C).", | |
"U", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Output( | |
0, | |
"Y", | |
"The output tensor of the same shape as X", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Output( | |
1, | |
"running_mean", | |
"The running mean after the BatchNormalization operator.", | |
"U", | |
OpSchema::Optional, | |
true, | |
1, | |
OpSchema::NonDifferentiable) | |
.Output( | |
2, | |
"running_var", | |
"The running variance after the BatchNormalization operator. This op uses the population size (N) for " | |
"calculating variance, and not the sample size N-1.", | |
"U", | |
OpSchema::Optional, | |
true, | |
1, | |
OpSchema::NonDifferentiable) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeConstraint( | |
"U", | |
{"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"}, | |
"Constrain mean and variance types to float tensors. It allows all float type for U.") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateShapeAndTypeFromFirstInput(ctx); | |
propagateShapeFromInputToOutput(ctx, 0, 0); | |
// Inputs 1 to 4 must be of rank 1. | |
checkInputRank(ctx, 1, 1); | |
checkInputRank(ctx, 2, 1); | |
checkInputRank(ctx, 3, 1); | |
checkInputRank(ctx, 4, 1); | |
Dim num_channels; | |
if (hasInputShape(ctx, 0)) { | |
if (getInputShape(ctx, 0).dim_size() > 1) | |
unifyInputDim(ctx, 0, 1, num_channels); | |
else | |
unifyDim(num_channels, 1); | |
} | |
unifyInputDim(ctx, 1, 0, num_channels); | |
unifyInputDim(ctx, 2, 0, num_channels); | |
unifyInputDim(ctx, 3, 0, num_channels); | |
unifyInputDim(ctx, 4, 0, num_channels); | |
if (ctx.getAttribute("training_mode") && static_cast<int>(ctx.getAttribute("training_mode")->i()) != 0) { | |
if (ctx.getNumOutputs() != 3) | |
fail_shape_inference("This number of op outputs should be 3 when Training_mode = True, but it is not."); | |
} else { | |
if (ctx.getNumOutputs() != 1) | |
fail_shape_inference("This number of op outputs should be 1 when Training_mode = False, but it is not."); | |
} | |
if (ctx.getNumOutputs() > 1) { | |
TensorShapeProto outputs_shape; | |
*outputs_shape.add_dim() = num_channels; // channel | |
propagateElemTypeFromInputToOutput(ctx, 3, 1); | |
updateOutputShape(ctx, 1, outputs_shape); | |
if (ctx.getNumOutputs() > 2) { | |
propagateElemTypeFromInputToOutput(ctx, 4, 2); | |
updateOutputShape(ctx, 2, outputs_shape); | |
} | |
} | |
})); | |
static const char* InstanceNormalization_ver1_doc = R"DOC( | |
Carries out instance normalization as described in the paper | |
https://arxiv.org/abs/1607.08022. | |
y = scale * (x - mean) / sqrt(variance + epsilon) + B, | |
where mean and variance are computed per instance per channel. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
InstanceNormalization, | |
1, | |
OpSchema() | |
.SetDoc(InstanceNormalization_ver1_doc) | |
// This attribute was added via AllowConsumed API in OpSchema. | |
// After removing the API, we're now using the Attr API to simulate the | |
// old definition. | |
.Attr("consumed_inputs", "legacy optimization attribute.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"epsilon", | |
"The epsilon value to use to avoid division by zero, default is 1e-5f.", | |
AttributeProto::FLOAT, | |
1e-5f) | |
.Input(0, "input", "The input 4-dimensional tensor of shape NCHW.", "T") | |
.Input(1, "scale", "The input 1-dimensional scale tensor of size C.", "T") | |
.Input(2, "B", "The input 1-dimensional bias tensor of size C.", "T") | |
.Output(0, "output", "The output 4-dimensional tensor of the same shape as input.", "T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.")); | |
static const char* Dropout_old_doc = R"DOC( | |
Dropout takes one input data (Tensor<float>) and produces two Tensor outputs, | |
output (Tensor<float>) and mask (Tensor<bool>). Depending on whether it is in | |
test mode or not, the output Y will either be a random dropout, or a simple | |
copy of the input. Note that our implementation of Dropout does scaling in | |
the training phase, so during testing nothing needs to be done. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
Dropout, | |
1, | |
OpSchema() | |
.SetDoc(Dropout_old_doc) | |
.Attr("ratio", "(float, default 0.5) the ratio of random dropout", AttributeProto::FLOAT, 0.5f) | |
// This attribute was added via AllowConsumed API in OpSchema. | |
// After removing the API, we're now using the Attr API to simulate the | |
// old definition. | |
.Attr("consumed_inputs", "legacy optimization attribute.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"is_test", | |
"(int, default 0) if nonzero, run dropout in test mode where " | |
"the output is simply Y = X.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Input(0, "data", "The input data as Tensor.", "T") | |
.Output(0, "output", "The output.", "T") | |
.Output( | |
1, | |
"mask", | |
"The output mask. If is_test is nonzero, this output is not filled.", | |
"T", | |
OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.")); | |
ONNX_OPERATOR_SET_SCHEMA( | |
Dropout, | |
6, | |
OpSchema() | |
.SetDoc(Dropout_old_doc) | |
.Attr("ratio", "(float, default 0.5) the ratio of random dropout", AttributeProto::FLOAT, 0.5f) | |
.Attr( | |
"is_test", | |
"(int, default 0) if nonzero, run dropout in test mode where " | |
"the output is simply Y = X.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Input(0, "data", "The input data as Tensor.", "T") | |
.Output(0, "output", "The output.", "T") | |
.Output( | |
1, | |
"mask", | |
"The output mask. If is_test is nonzero, this output is not filled.", | |
"T", | |
OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeAndShapeInferenceFunction(propagateShapeAndTypeFromFirstInput)); | |
static const char* Dropout_ver7_doc = R"DOC( | |
Dropout takes one input data (Tensor<float>) and produces two Tensor outputs, | |
output (Tensor<float>) and mask (Tensor<bool>). Depending on whether it is in | |
test mode or not, the output Y will either be a random dropout, or a simple | |
copy of the input. Note that our implementation of Dropout does scaling in | |
the training phase, so during testing nothing needs to be done. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
Dropout, | |
7, | |
OpSchema() | |
.SetDoc(GET_OP_DOC_STR(std::string(Dropout_ver7_doc) + GenerateOptionalArgumentsDoc())) | |
.Attr("ratio", "The ratio of random dropout", AttributeProto::FLOAT, 0.5f) | |
.Input(0, "data", "The input data as Tensor.", "T") | |
.Output(0, "output", "The output.", "T") | |
.Output(1, "mask", "The output mask.", "T", OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeAndShapeInferenceFunction(propagateShapeAndTypeFromFirstInput)); | |
static const char* Dropout_ver10_doc = R"DOC( | |
Dropout takes one input floating tensor and produces two tensor outputs, | |
output (floating tensor) and mask (`Tensor<bool>`). Depending on whether it is | |
in test mode or not, the output Y will either be a random dropout, or a simple | |
copy of the input. Note that our implementation of Dropout does scaling in | |
the training phase, so during testing nothing needs to be done. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
Dropout, | |
10, | |
OpSchema() | |
.SetDoc(GET_OP_DOC_STR(std::string(Dropout_ver10_doc) + GenerateOptionalArgumentsDoc())) | |
.Attr("ratio", "The ratio of random dropout", AttributeProto::FLOAT, 0.5f) | |
.Input(0, "data", "The input data as Tensor.", "T") | |
.Output(0, "output", "The output.", "T") | |
.Output(1, "mask", "The output mask.", "T1", OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeConstraint("T1", {"tensor(bool)"}, "Constrain output mask types to boolean tensors.") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateShapeAndTypeFromFirstInput(ctx); | |
if (ctx.getNumOutputs() == 2) { | |
updateOutputElemType(ctx, 1, TensorProto::BOOL); | |
if (hasNInputShapes(ctx, 1)) { | |
propagateShapeFromInputToOutput(ctx, 0, 1); | |
} | |
} | |
})); | |
static const char* BatchNorm_ver6_doc = R"DOC( | |
Carries out batch normalization as described in the paper | |
https://arxiv.org/abs/1502.03167. Depending on the mode it is being run, | |
there are multiple cases for the number of outputs, which we list below: | |
Output case #1: Y, mean, var, saved_mean, saved_var (training mode) | |
Output case #2: Y (test mode) | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
BatchNormalization, | |
6, | |
OpSchema() | |
.NumOutputs({1, 5}) | |
.SetDoc(BatchNorm_ver6_doc) | |
.Attr( | |
"spatial", | |
"If true, compute the mean and variance across all spatial elements " | |
"If false, compute the mean and variance across per feature." | |
"Default is 1.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.Attr( | |
"is_test", | |
"If set to nonzero, run spatial batch normalization in test mode, default is 0.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Attr( | |
"epsilon", | |
"The epsilon value to use to avoid division by zero, default is 1e-5f.", | |
AttributeProto::FLOAT, | |
1e-5f) | |
.Attr( | |
"momentum", | |
"Factor used in computing the running mean and variance." | |
"e.g., running_mean = running_mean * momentum + mean * (1 - momentum), default is 0.9f.", | |
AttributeProto::FLOAT, | |
0.9f) | |
.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the batch " | |
"size.", | |
"T") | |
.Input( | |
1, | |
"scale", | |
"The scale as a 1-dimensional tensor of size C to be applied to the " | |
"output.", | |
"T") | |
.Input( | |
2, | |
"B", | |
"The bias as a 1-dimensional tensor of size C to be applied to the " | |
"output.", | |
"T") | |
.Input( | |
3, | |
"mean", | |
"The running mean (training) or the estimated mean (testing) " | |
"as a 1-dimensional tensor of size C.", | |
"T") | |
.Input( | |
4, | |
"var", | |
"The running variance (training) or the estimated " | |
"variance (testing) as a 1-dimensional tensor of size C.", | |
"T") | |
.Output(0, "Y", "The output tensor of the same shape as X.", "T") | |
.Output( | |
1, | |
"mean", | |
"The running mean after the BatchNormalization operator. Must be in-place " | |
"with the input mean. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.Output( | |
2, | |
"var", | |
"The running variance after the BatchNormalization operator. Must be " | |
"in-place with the input var. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.Output( | |
3, | |
"saved_mean", | |
"Saved mean used during training to speed up gradient " | |
"computation. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.Output( | |
4, | |
"saved_var", | |
"Saved variance used during training to speed up " | |
"gradient computation. Should not be used for testing.", | |
"T", | |
OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateShapeAndTypeFromFirstInput(ctx); | |
// TODO in training mode, it may be possible to infer some of | |
// the other outputs as well. | |
})); | |
static const char* Flatten_ver1_doc = R"DOC( | |
Flattens the input tensor into a 2D matrix. If input tensor has shape | |
(d_0, d_1, ... d_n) then the output will have shape | |
(d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn). | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
Flatten, | |
1, | |
OpSchema() | |
.SetDoc(Flatten_ver1_doc) | |
.Input(0, "input", "A tensor of rank >= axis.", "T") | |
.Output( | |
0, | |
"output", | |
"A 2D tensor with the contents of the input tensor, " | |
"with input dimensions up to axis flattened to the outer dimension " | |
"of the output and remaining input dimensions flattened into the inner " | |
"dimension of the output.", | |
"T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.Attr( | |
"axis", | |
"Indicate up to which input dimensions " | |
"(exclusive) should be flattened to the outer dimension of the output. " | |
"The value for axis must be in the range [0, R], where R is the rank of the input tensor. " | |
"When axis = 0, the shape of the output tensor is (1, (d_0 X d_1 ... d_n), " | |
"where the shape of the input tensor is (d_0, d_1, ... d_n). ", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (!hasInputShape(ctx, 0)) | |
return; | |
auto& input_shape = getInputShape(ctx, 0); | |
int rank = static_cast<int>(input_shape.dim_size()); | |
int axis = static_cast<int>(getAttribute(ctx, "axis", 1)); | |
if (axis > rank || axis < 0) { | |
fail_shape_inference("Invalid value(", axis, ") for attribute 'axis'"); | |
} | |
// TODO: is the operation defined for input-rank < 2? | |
updateOutputShape(ctx, 0, {multiplyDims(input_shape, 0, axis), multiplyDims(input_shape, axis, rank)}); | |
})); | |
static const char* Flatten_ver9_doc = R"DOC( | |
Flattens the input tensor into a 2D matrix. If input tensor has shape | |
(d_0, d_1, ... d_n) then the output will have shape | |
(d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn). | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
Flatten, | |
9, | |
OpSchema() | |
.SetDoc(Flatten_ver9_doc) | |
.Input(0, "input", "A tensor of rank >= axis.", "T") | |
.Output( | |
0, | |
"output", | |
"A 2D tensor with the contents of the input tensor, " | |
"with input dimensions up to axis flattened to the outer dimension " | |
"of the output and remaining input dimensions flattened into the inner " | |
"dimension of the output.", | |
"T") | |
.TypeConstraint("T", OpSchema::all_tensor_types(), "Constrain input and output to all tensor types.") | |
.Attr( | |
"axis", | |
"Indicate up to which input dimensions " | |
"(exclusive) should be flattened to the outer dimension of the output. " | |
"The value for axis must be in the range [0, R], where R is the rank of the input tensor. " | |
"When axis = 0, the shape of the output tensor is (1, (d_0 X d_1 ... d_n), " | |
"where the shape of the input tensor is (d_0, d_1, ... d_n). ", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (!hasInputShape(ctx, 0)) | |
return; | |
auto& input_shape = getInputShape(ctx, 0); | |
int rank = static_cast<int>(input_shape.dim_size()); | |
int axis = static_cast<int>(getAttribute(ctx, "axis", 1)); | |
if (axis > rank || axis < 0) { | |
fail_shape_inference("Invalid value(", axis, ") for attribute 'axis'"); | |
} | |
// TODO: is the operation defined for input-rank < 2? | |
updateOutputShape(ctx, 0, {multiplyDims(input_shape, 0, axis), multiplyDims(input_shape, axis, rank)}); | |
})); | |
static const char* BatchNormalization_ver7_doc = R"DOC( | |
Carries out batch normalization as described in the paper | |
https://arxiv.org/abs/1502.03167. Depending on the mode it is being run, | |
there are multiple cases for the number of outputs, which we list below: | |
Output case #1: Y, mean, var, saved_mean, saved_var (training mode) | |
Output case #2: Y (test mode) | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
BatchNormalization, | |
7, | |
OpSchema() | |
.SetDoc(GET_OP_DOC_STR(std::string(BatchNormalization_ver7_doc) + GenerateOptionalArgumentsDoc())) | |
.NumOutputs({1, 5}) | |
.Attr( | |
"spatial", | |
"If true, compute the mean and variance across per activation. " | |
"If false, compute the mean and variance across per feature over " | |
"each mini-batch.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) | |
.Attr( | |
"momentum", | |
"Factor used in computing the running mean and variance." | |
"e.g., running_mean = running_mean * momentum + mean * (1 - momentum).", | |
AttributeProto::FLOAT, | |
0.9f) | |
.Input( | |
0, | |
"X", | |
"Input data tensor from the previous operator; " | |
"dimensions for image case are (N x C x H x W), " | |
"where N is the batch size, C is the number of " | |
"channels, and H and W are the height and the " | |
"width of the data. For non image case, the " | |
"dimensions are in the form of " | |
"(N x C x D1 x D2 ... Dn), where N is the batch " | |
"size.", | |
"T") | |
.Input( | |
1, | |
"scale", | |
"If spatial is true, the dimension of scale is (C). " | |
"If spatial is false, the dimensions of scale are " | |
"(C x D1 x ... x Dn)", | |
"T") | |
.Input( | |
2, | |
"B", | |
"If spatial is true, the dimension of bias is (C). " | |
"If spatial is false, the dimensions of bias are " | |
"(C x D1 x ... x Dn)", | |
"T") | |
.Input( | |
3, | |
"mean", | |
"If spatial is true, the dimension of the running mean " | |
"(training) or the estimated mean (testing) is (C). " | |
"If spatial is false, the dimensions of the running mean " | |
"(training) or the estimated mean (testing) are (C x D1 x ... x Dn).", | |
"T") | |
.Input( | |
4, | |
"var", | |
"If spatial is true, the dimension of the running variance" | |
"(training) or the estimated variance (testing) is (C). " | |
"If spatial is false, the dimensions of the running variance" | |
"(training) or the estimated variance (testing) are (C x D1 x ... x Dn).", | |
"T") | |
.Output(0, "Y", "The output tensor of the same shape as X", "T") | |
.Output(1, "mean", "The running mean after the BatchNormalization operator.", "T", OpSchema::Optional) | |
.Output(2, "var", "The running variance after the BatchNormalization operator.", "T", OpSchema::Optional) | |
.Output( | |
3, | |
"saved_mean", | |
"Saved mean used during training to speed up gradient " | |
"computation.", | |
"T", | |
OpSchema::Optional) | |
.Output( | |
4, | |
"saved_var", | |
"Saved variance used during training to speed up " | |
"gradient computation.", | |
"T", | |
OpSchema::Optional) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)"}, | |
"Constrain input and output types to float tensors.") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateShapeAndTypeFromFirstInput(ctx); | |
// TODO in training mode, it may be possible to infer some of | |
// the other outputs as well. | |
})); | |
static const char* GroupNormalization_ver18_doc = R"DOC( | |
A GroupNormalization function. Carries out group normalization as described in | |
the paper https://arxiv.org/abs/1803.08494 | |
This operator transforms input according to | |
``` | |
y = scale * (x - mean) / sqrt(variance + epsilon) + bias, | |
``` | |
where the mean and variance are computed per instance per group of channels, and | |
`scale` and `bias` should be specified for each group of channels. The number of | |
groups `num_groups` should be divisible by the number of channels so that there are | |
an equal number of channels per group. | |
When the number of groups is the same as the number of channels, this operator is | |
equivalent to InstanceNormalization. When there is only one group, this operator | |
is equivalent to LayerNormalization. | |
)DOC"; | |
ONNX_OPERATOR_SET_SCHEMA( | |
GroupNormalization, | |
18, | |
OpSchema() | |
.SetDoc(GroupNormalization_ver18_doc) | |
.Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) | |
.Attr( | |
"num_groups", | |
"The number of groups of channels. It should be a divisor of the number of channels `C`.", | |
AttributeProto::INT, | |
true) | |
.Input( | |
0, | |
"X", | |
"Input data tensor. Dimensions for image cases are `(N x C x H x W)`, where `N` is the batch size, " | |
"`C` is the number of channels, and `H` and `W` are the height and width of the data. Statistics are " | |
"computed for every group of channels over `C`, `H`, and `W`. For non-image cases, the dimensions are " | |
"in the form of `(N x C x D1 x D2 ... Dn)`.", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Input( | |
1, | |
"scale", | |
"Scale tensor of shape `(num_groups)`.", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Input( | |
2, | |
"bias", | |
"Bias tensor of shape `(num_groups)`.", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.Output( | |
0, | |
"Y", | |
"The output tensor of the same shape as `X`.", | |
"T", | |
OpSchema::Single, | |
true, | |
1, | |
OpSchema::Differentiable) | |
.TypeConstraint( | |
"T", | |
{"tensor(float16)", "tensor(float)", "tensor(double)", "tensor(bfloat16)"}, | |
"Constrain input and output types to float tensors.") | |
.SetContextDependentFunctionBodyBuilder( | |
[](const FunctionBodyBuildContext& ctx, const OpSchema& schema, FunctionProto& functionProto) { | |
// GroupNormalization <epsilon, num_groups> (X, scale, bias) => (Y) | |
auto* tp = ctx.getInputType(0); | |
if ((tp == nullptr) || (!tp->has_tensor_type())) | |
return false; | |
int64_t T = tp->tensor_type().elem_type(); | |
auto* epsilon_attr = ctx.getAttribute("epsilon"); | |
float epsilon = (epsilon_attr != nullptr) ? epsilon_attr->f() : 1e-5f; | |
auto* num_groups_attr = ctx.getAttribute("num_groups"); | |
if (num_groups_attr == nullptr) | |
return false; | |
int64_t num_groups = num_groups_attr->i(); | |
FunctionBuilder builder(functionProto); | |
builder.Const1D("FloatEpsilon", epsilon) | |
.Add("Epsilon = Cast (FloatEpsilon)", "to", T) | |
.Add("XShape = Shape (X)") // shape of input tensor: 1D tensor | |
.Add("C = Shape <start = 1, end = 2> (X)") | |
.Const1D("NumGroups", num_groups) | |
.Add("GroupSize = Div (C, NumGroups)") | |
.Add("N = Shape <start = 0, end = 1> (X)") // batch size | |
.Add("InstanceShape = Shape <start = 2> (X)") // data instance shape | |
// NewShape = [N, num_groups, group_size, H, W, (...)] | |
.Add("NewShape = Concat <axis = 0> (N, NumGroups, GroupSize, InstanceShape)") | |
.Add("XReshaped = Reshape (X, NewShape)") | |
// Flatten into 3D tensor: [N, num_groups, group_size x H x W (x ...)] | |
.Add("Shape3D = Constant <value_ints = [0, 0, -1]> ()") | |
.Add("X3D = Reshape(XReshaped, Shape3D)") | |
// Calculate statistics | |
.Const1D("Axes2", (int64_t)2) | |
.Add("Mean = ReduceMean (X3D, Axes2)") | |
.Add("Square = Mul (X3D, X3D)") | |
.Add("MeanOfSquare = ReduceMean (Square, Axes2)") | |
.Add("SquareOfMean = Mul (Mean, Mean)") | |
.Add("Var = Sub (MeanOfSquare, SquareOfMean)") | |
.Add("VarPlusEpsilon = Add (Var, Epsilon)") | |
.Add("StdDev = Sqrt (VarPlusEpsilon)") | |
.Add("Deviation = Sub (X3D, Mean)") | |
.Add("Normalized = Div (Deviation, StdDev)") | |
// Reshape scale and bias for broadcasting | |
.Add("ScaleShape = Constant <value_ints = [1, -1, 1]> ()") | |
.Add("ScaleT = Cast (scale)", "to", T) | |
.Add("BiasT = Cast (bias)", "to", T) | |
.Add("ScaleReshaped = Reshape (ScaleT, ScaleShape)") | |
.Add("BiasReshaped = Reshape (BiasT, ScaleShape)") | |
// Calculate scaled and biased output | |
.Add("Scaled = Mul (ScaleReshaped, Normalized)") | |
.Add("Biased = Add (Scaled, BiasReshaped)") | |
.Add("Y = Reshape (Biased, XShape)"); | |
schema.BuildFunction(functionProto); | |
return true; | |
})); | |
} // namespace ONNX_NAMESPACE | |