Spaces:
Sleeping
Sleeping
/* | |
* SPDX-License-Identifier: Apache-2.0 | |
*/ | |
namespace ONNX_NAMESPACE { | |
static const char* ArrayFeatureExtractor_ver1_doc = R"DOC( | |
Select elements of the input tensor based on the indices passed.<br> | |
The indices are applied to the last axes of the tensor. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
ArrayFeatureExtractor, | |
1, | |
OpSchema() | |
.SetDoc(ArrayFeatureExtractor_ver1_doc) | |
.Input(0, "X", "Data to be selected", "T") | |
.Input(1, "Y", "The indices, based on 0 as the first index of any dimension.", "tensor(int64)") | |
.Output(0, "Z", "Selected output data as an array", "T") | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
propagateElemTypeFromInputToOutput(ctx, 0, 0); | |
if (!hasNInputShapes(ctx, 1)) { | |
return; | |
} | |
const auto& input_shape = ctx.getInputType(0)->tensor_type().shape(); | |
const auto input_ndim = input_shape.dim_size(); | |
if (input_ndim == 1) { | |
return; | |
} | |
auto output_shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape(); | |
// This operator only applies to the last dimension; thus -1 | |
for (int i = 0; i < input_ndim - 1; ++i) { | |
*output_shape->add_dim() = input_shape.dim(i); | |
} | |
// value of the output's last dimension is the total amount of indices | |
// set Unknown length for the last dimension if it cannot be calculated | |
auto last_dim = output_shape->add_dim(); | |
if (hasInputShape(ctx, 1)) { | |
const auto& indices_shape = getInputShape(ctx, 1); | |
if (indices_shape.dim_size() > 0) { | |
int64_t num_indices = 1; | |
std::string single_symbolic_dim; | |
for (int i = 0; i < indices_shape.dim_size(); i++) { | |
if (indices_shape.dim(i).has_dim_value()) { | |
num_indices *= indices_shape.dim(i).dim_value(); | |
} else if (indices_shape.dim(i).has_dim_param()) { | |
if (single_symbolic_dim.empty()) { | |
// it is possible to set symbolic dimension param if the rest dim values are all | |
// value 1 | |
single_symbolic_dim = indices_shape.dim(i).dim_param(); | |
} else { | |
return; | |
} | |
} else { | |
return; | |
} | |
} | |
if (single_symbolic_dim.empty()) { | |
last_dim->set_dim_value(num_indices); | |
} else if (num_indices == 1) { | |
last_dim->set_dim_param(single_symbolic_dim); | |
} | |
} | |
} | |
}) | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)", "tensor(string)"}, | |
"The input must be a tensor of a numeric type or string. The output will be of the same tensor type.")); | |
static const char* Binarizer_ver1_doc = R"DOC( | |
Maps the values of the input tensor to either 0 or 1, element-wise, based on the outcome of a comparison against a threshold value. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
Binarizer, | |
1, | |
OpSchema() | |
.SetDoc(Binarizer_ver1_doc) | |
.Input(0, "X", "Data to be binarized", "T") | |
.Output(0, "Y", "Binarized output data", "T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input must be a tensor of a numeric type. The output will be of the same tensor type.") | |
.Attr("threshold", "Values greater than this are mapped to 1, others to 0.", AttributeProto::FLOAT, 0.f) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { propagateShapeAndTypeFromFirstInput(ctx); })); | |
static const char* CastMap_ver1_doc = R"DOC( | |
Converts a map to a tensor.<br>The map key must be an int64 and the values will be ordered | |
in ascending order based on this key.<br>The operator supports dense packing or sparse packing. | |
If using sparse packing, the key cannot exceed the max_map-1 value. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
CastMap, | |
1, | |
OpSchema() | |
.SetDoc(CastMap_ver1_doc) | |
.Input(0, "X", "The input map that is to be cast to a tensor", "T1") | |
.Output(0, "Y", "A tensor representing the same data as the input map, ordered by their keys", "T2") | |
.TypeConstraint( | |
"T1", | |
{"map(int64, string)", "map(int64, float)"}, | |
"The input must be an integer map to either string or float.") | |
.TypeConstraint( | |
"T2", | |
{"tensor(string)", "tensor(float)", "tensor(int64)"}, | |
"The output is a 1-D tensor of string, float, or integer.") | |
.Attr( | |
"cast_to", | |
"A string indicating the desired element type of the output tensor, one of 'TO_FLOAT', 'TO_STRING', " | |
"'TO_INT64'.", | |
AttributeProto::STRING, | |
std::string("TO_FLOAT")) | |
.Attr( | |
"map_form", | |
"Indicates whether to only output as many values as are in the input (dense), or position the input based " | |
"on using the key of the map as the index of the output (sparse).<br>One of 'DENSE', 'SPARSE'.", | |
AttributeProto::STRING, | |
std::string("DENSE")) | |
.Attr( | |
"max_map", | |
"If the value of map_form is 'SPARSE,' this attribute indicates the total length of the output tensor.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
auto cast_to_attr = ctx.getAttribute("cast_to"); | |
auto output_type = ctx.getOutputType(0)->mutable_tensor_type(); | |
if (nullptr == cast_to_attr) { | |
output_type->set_elem_type(TensorProto::FLOAT); | |
return; | |
} | |
auto& cast_to = cast_to_attr->s(); | |
if (0 == cast_to.compare("TO_FLOAT")) { | |
output_type->set_elem_type(TensorProto::FLOAT); | |
} else if (0 == cast_to.compare("TO_INT64")) { | |
output_type->set_elem_type(TensorProto::INT64); | |
} else if (0 == cast_to.compare("TO_STRING")) { | |
output_type->set_elem_type(TensorProto::STRING); | |
} | |
})); | |
static const char* CategoryMapper_ver1_doc = R"DOC( | |
Converts strings to integers and vice versa.<br> | |
Two sequences of equal length are used to map between integers and strings, | |
with strings and integers at the same index detailing the mapping.<br> | |
Each operator converts either integers to strings or strings to integers, depending | |
on which default value attribute is provided. Only one default value attribute | |
should be defined.<br> | |
If the string default value is set, it will convert integers to strings. | |
If the int default value is set, it will convert strings to integers. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
CategoryMapper, | |
1, | |
OpSchema() | |
.SetDoc(CategoryMapper_ver1_doc) | |
.Input(0, "X", "Input data", "T1") | |
.Output(0, "Y", "Output data. If strings are input, the output values are integers, and vice versa.", "T2") | |
.TypeConstraint( | |
"T1", | |
{"tensor(string)", "tensor(int64)"}, | |
"The input must be a tensor of strings or integers, either [N,C] or [C].") | |
.TypeConstraint( | |
"T2", | |
{"tensor(string)", "tensor(int64)"}, | |
"The output is a tensor of strings or integers. Its shape will be the same as the input shape.") | |
.Attr( | |
"cats_strings", | |
"The strings of the map. This sequence must be the same length as the 'cats_int64s' sequence", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"cats_int64s", | |
"The integers of the map. This sequence must be the same length as the 'cats_strings' sequence.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"default_string", | |
"A string to use when an input integer value is not found in the map.<br>One and only one of the " | |
"'default_*' attributes must be defined.", | |
AttributeProto::STRING, | |
std::string("_Unused")) | |
.Attr( | |
"default_int64", | |
"An integer to use when an input string value is not found in the map.<br>One and only one of the " | |
"'default_*' attributes must be defined.", | |
AttributeProto::INT, | |
static_cast<int64_t>(-1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
if (nullptr == ctx.getInputType(0)) | |
return; | |
auto input_elem_type = ctx.getInputType(0)->tensor_type().elem_type(); | |
if (TensorProto::STRING == input_elem_type) { | |
updateOutputElemType(ctx, 0, TensorProto::INT64); | |
} else if (TensorProto::INT64 == input_elem_type) { | |
updateOutputElemType(ctx, 0, TensorProto::STRING); | |
} | |
if (hasInputShape(ctx, 0)) { | |
propagateShapeFromInputToOutput(ctx, 0, 0); | |
} | |
})); | |
static const char* DictVectorizer_ver1_doc = R"DOC( | |
Uses an index mapping to convert a dictionary to an array.<br> | |
Given a dictionary, each key is looked up in the vocabulary attribute corresponding to | |
the key type. The index into the vocabulary array at which the key is found is then | |
used to index the output 1-D tensor 'Y' and insert into it the value found in the dictionary 'X'.<br> | |
The key type of the input map must correspond to the element type of the defined vocabulary attribute. | |
Therefore, the output array will be equal in length to the index mapping vector parameter. | |
All keys in the input dictionary must be present in the index mapping vector. | |
For each item in the input dictionary, insert its value in the output array. | |
Any keys not present in the input dictionary, will be zero in the output array.<br> | |
For example: if the ``string_vocabulary`` parameter is set to ``["a", "c", "b", "z"]``, | |
then an input of ``{"a": 4, "c": 8}`` will produce an output of ``[4, 8, 0, 0]``. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
DictVectorizer, | |
1, | |
OpSchema() | |
.SetDoc(DictVectorizer_ver1_doc) | |
.Input(0, "X", "A dictionary.", "T1") | |
.Output(0, "Y", "A 1-D tensor holding values from the input dictionary.", "T2") | |
.TypeConstraint( | |
"T1", | |
{"map(string, int64)", | |
"map(int64, string)", | |
"map(int64, float)", | |
"map(int64, double)", | |
"map(string, float)", | |
"map(string, double)"}, | |
"The input must be a map from strings or integers to either strings or a numeric type. The key and value " | |
"types cannot be the same.") | |
.TypeConstraint( | |
"T2", | |
{"tensor(int64)", "tensor(float)", "tensor(double)", "tensor(string)"}, | |
"The output will be a tensor of the value type of the input map. It's shape will be [1,C], where C is the " | |
"length of the input dictionary.") | |
.Attr( | |
"string_vocabulary", | |
"A string vocabulary array.<br>One and only one of the vocabularies must be defined.", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"int64_vocabulary", | |
"An integer vocabulary array.<br>One and only one of the vocabularies must be defined.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
auto input_elem_type = ctx.getInputType(0)->map_type().value_type().tensor_type().elem_type(); | |
auto output_elem_type = ctx.getOutputType(0)->mutable_tensor_type(); | |
output_elem_type->set_elem_type(input_elem_type); | |
})); | |
static const char* FeatureVectorizer_ver1_doc = R"DOC( | |
Concatenates input tensors into one continuous output.<br> | |
All input shapes are 2-D and are concatenated along the second dimension. 1-D tensors are treated as [1,C]. | |
Inputs are copied to the output maintaining the order of the input arguments.<br> | |
All inputs must be integers or floats, while the output will be all floating point values. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
FeatureVectorizer, | |
1, | |
OpSchema() | |
.SetDoc(FeatureVectorizer_ver1_doc) | |
.Input(0, "X", "An ordered collection of tensors, all with the same element type.", "T1", OpSchema::Variadic) | |
.Output(0, "Y", "The output array, elements ordered as the inputs.", "tensor(float)") | |
.TypeConstraint( | |
"T1", | |
{"tensor(int32)", "tensor(int64)", "tensor(float)", "tensor(double)"}, | |
"The input type must be a tensor of a numeric type.") | |
.Attr("inputdimensions", "The size of each input in the input list", AttributeProto::INTS, OPTIONAL_VALUE)); | |
static const char* Imputer_ver1_doc = R"DOC( | |
Replaces inputs that equal one value with another, leaving all other elements alone.<br> | |
This operator is typically used to replace missing values in situations where they have a canonical | |
representation, such as -1, 0, NaN, or some extreme value.<br> | |
One and only one of imputed_value_floats or imputed_value_int64s should be defined -- floats if the input tensor | |
holds floats, integers if the input tensor holds integers. The imputed values must all fit within the | |
width of the tensor element type. One and only one of the replaced_value_float or replaced_value_int64 should be defined, | |
which one depends on whether floats or integers are being processed.<br> | |
The imputed_value attribute length can be 1 element, or it can have one element per input feature.<br>In other words, if the input tensor has the shape [*,F], then the length of the attribute array may be 1 or F. If it is 1, then it is broadcast along the last dimension and applied to each feature. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
Imputer, | |
1, | |
OpSchema() | |
.SetDoc(Imputer_ver1_doc) | |
.Input(0, "X", "Data to be processed.", "T") | |
.Output(0, "Y", "Imputed output data", "T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input type must be a tensor of a numeric type, either [N,C] or [C]. The output type will be of the " | |
"same tensor type and shape.") | |
.Attr("imputed_value_floats", "Value(s) to change to", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr("replaced_value_float", "A value that needs replacing.", AttributeProto::FLOAT, 0.f) | |
.Attr("imputed_value_int64s", "Value(s) to change to.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("replaced_value_int64", "A value that needs replacing.", AttributeProto::INT, static_cast<int64_t>(0))); | |
static const char* LabelEncoder_ver4_doc = R"DOC( | |
Maps each element in the input tensor to another value.<br> | |
The mapping is determined by the two parallel attributes, 'keys_*' and | |
'values_*' attribute. The i-th value in the specified 'keys_*' attribute | |
would be mapped to the i-th value in the specified 'values_*' attribute. It | |
implies that input's element type and the element type of the specified | |
'keys_*' should be identical while the output type is identical to the | |
specified 'values_*' attribute. Note that the 'keys_*' and 'values_*' attributes | |
must have the same length. If an input element can not be found in the | |
specified 'keys_*' attribute, the 'default_*' that matches the specified | |
'values_*' attribute may be used as its output value. The type of the 'default_*' | |
attribute must match the 'values_*' attribute chosen. <br> | |
Let's consider an example which maps a string tensor to an integer tensor. | |
Assume and 'keys_strings' is ["Amy", "Sally"], 'values_int64s' is [5, 6], | |
and 'default_int64' is '-1'. The input ["Dori", "Amy", "Amy", "Sally", | |
"Sally"] would be mapped to [-1, 5, 5, 6, 6].<br> | |
Since this operator is an one-to-one mapping, its input and output shapes | |
are the same. Notice that only one of 'keys_*'/'values_*' can be set.<br> | |
Float keys with value 'NaN' match any input 'NaN' value regardless of bit | |
value. If a key is repeated, the last key takes precedence. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
LabelEncoder, | |
4, | |
OpSchema() | |
.SetDoc(LabelEncoder_ver4_doc) | |
.Input(0, "X", "Input data. It must have the same element type as the keys_* attribute set.", "T1") | |
.Output(0, "Y", "Output data. This tensor's element type is based on the values_* attribute set.", "T2") | |
.TypeConstraint( | |
"T1", | |
{"tensor(string)", "tensor(int64)", "tensor(float)", "tensor(int32)", "tensor(int16)", "tensor(double)"}, | |
"The input type is a tensor of any shape.") | |
.TypeConstraint( | |
"T2", | |
{"tensor(string)", "tensor(int64)", "tensor(float)", "tensor(int32)", "tensor(int16)", "tensor(double)"}, | |
"Output type is determined by the specified 'values_*' attribute.") | |
.Attr( | |
"keys_tensor", | |
"Keys encoded as a 1D tensor. One and only one of 'keys_*'s should be set.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr("keys_strings", "A list of strings.", AttributeProto::STRINGS, OPTIONAL_VALUE) | |
.Attr("keys_int64s", "A list of ints.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("keys_floats", "A list of floats.", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr( | |
"values_tensor", | |
"Values encoded as a 1D tensor. One and only one of 'values_*'s should be set.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr("values_strings", "A list of strings.", AttributeProto::STRINGS, OPTIONAL_VALUE) | |
.Attr("values_int64s", "A list of ints.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("values_floats", "A list of floats.", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr("default_string", "A string.", AttributeProto::STRING, std::string("_Unused")) | |
.Attr("default_int64", "An integer.", AttributeProto::INT, static_cast<int64_t>(-1)) | |
.Attr("default_float", "A float.", AttributeProto::FLOAT, -0.f) | |
.Attr( | |
"default_tensor", | |
"A default tensor. {\"_Unused\"} if values_* has string type, {-1} if values_* has integral type, and " | |
"{-0.f} if values_* has float type.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
int key_length, key_type; | |
std::tie(key_type, key_length) = | |
getAttributeElementTypeAndLength(ctx, {"keys_tensor", "keys_strings", "keys_int64s", "keys_floats"}); | |
if (key_type == TensorProto::UNDEFINED) { | |
fail_shape_inference("At least one of keys_tensor, keys_strings, keys_int64s, keys_floats must be set."); | |
} | |
if (key_type != ctx.getInputType(0)->tensor_type().elem_type()) { | |
fail_shape_inference( | |
"The input type was ", | |
ctx.getInputType(0)->tensor_type().elem_type(), | |
" and the key type ", | |
key_type, | |
" are different, which is not permitted for LabelEncoders."); | |
} | |
int value_length, value_type; | |
std::tie(value_type, value_length) = getAttributeElementTypeAndLength( | |
ctx, {"values_tensor", "values_strings", "values_int64s", "values_floats"}); | |
if (value_type == TensorProto::UNDEFINED) { | |
fail_shape_inference( | |
"At least one of values_tensor, values_strings, values_int64s, values_floats must be set."); | |
} | |
if (value_length != key_length) { | |
fail_shape_inference( | |
"The number of keys ", | |
key_length, | |
" and the number of values ", | |
value_length, | |
" must be the same in the LabelEncoder."); | |
} | |
auto default_attr = ctx.getAttribute("default_tensor"); | |
if (nullptr != default_attr && default_attr->has_t() && default_attr->t().has_data_type() && | |
default_attr->t().data_type() != TensorProto_DataType_UNDEFINED) { | |
auto default_tensor = default_attr->t(); | |
if (default_tensor.data_type() != value_type) { | |
fail_shape_inference( | |
"The default tensor type ", | |
default_tensor.data_type(), | |
" and the value type ", | |
value_type, | |
" must be the same in the LabelEncoder."); | |
} | |
if (1 != default_tensor.dims_size() || 1 != default_tensor.dims(0)) { | |
fail_shape_inference("The default tensor must be a singleton 1D tensor."); | |
} | |
} | |
// Propagate shape from input type and assign output type based on value type | |
ctx.getOutputType(0)->mutable_tensor_type()->set_elem_type(value_type); | |
propagateShapeFromInputToOutput(ctx, 0, 0); | |
})); | |
static const char* LinearClassifier_ver1_doc = R"DOC( | |
Linear classifier | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
LinearClassifier, | |
1, | |
OpSchema() | |
.SetDoc(LinearClassifier_ver1_doc) | |
.Input(0, "X", "Data to be classified.", "T1") | |
.Output(0, "Y", "Classification outputs (one class per example).", "T2") | |
.Output(1, "Z", "Classification scores ([N,E] - one score for each class and example", "tensor(float)") | |
.TypeConstraint( | |
"T1", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input must be a tensor of a numeric type, and of shape [N,C] or [C]. In the latter case, it will be " | |
"treated as [1,C]") | |
.TypeConstraint( | |
"T2", | |
{"tensor(string)", "tensor(int64)"}, | |
"The output will be a tensor of strings or integers.") | |
.Attr("coefficients", "A collection of weights of the model(s).", AttributeProto::FLOATS) | |
.Attr("intercepts", "A collection of intercepts.", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr( | |
"multi_class", | |
"Indicates whether to do OvR or multinomial (0=OvR is the default).", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Attr( | |
"classlabels_strings", | |
"Class labels when using string labels. One and only one 'classlabels' attribute must be defined.", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"classlabels_ints", | |
"Class labels when using integer labels. One and only one 'classlabels' attribute must be defined.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"post_transform", | |
"Indicates the transform to apply to the scores vector.<br>One of 'NONE,' 'SOFTMAX,' 'LOGISTIC,' " | |
"'SOFTMAX_ZERO,' or 'PROBIT'", | |
AttributeProto::STRING, | |
std::string("NONE")) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
std::vector<std::string> label_strs; | |
std::vector<int64_t> label_ints; | |
auto labels_strings_present = getRepeatedAttribute(ctx, "classlabels_strings", label_strs); | |
bool using_strings = (labels_strings_present && !label_strs.empty()); | |
if (!using_strings) { | |
getRepeatedAttribute(ctx, "classlabels_ints", label_ints); | |
} | |
// Type inference | |
auto* output_elem_type = ctx.getOutputType(0)->mutable_tensor_type(); | |
if (using_strings) { | |
output_elem_type->set_elem_type(TensorProto::STRING); | |
} else { | |
output_elem_type->set_elem_type(TensorProto::INT64); | |
} | |
// second output is always of float type | |
ctx.getOutputType(1)->mutable_tensor_type()->set_elem_type(TensorProto::FLOAT); | |
// Shape/Rank inference begins | |
// establish the number of classes | |
std::vector<float> intercepts; | |
getRepeatedAttribute(ctx, "intercepts", intercepts); | |
int class_count = static_cast<int>(intercepts.size()); | |
if (intercepts.size() == 1 && | |
((using_strings && label_strs.size() == 2) || (!using_strings && label_ints.size() == 2))) { | |
class_count = 2; | |
} | |
TensorShapeProto_Dimension batch_size_dim, class_count_dim; | |
class_count_dim.set_dim_value(class_count); | |
if (hasNInputShapes(ctx, 1)) { | |
const auto& input_shape = ctx.getInputType(0)->tensor_type().shape(); | |
const auto input_rank = input_shape.dim_size(); | |
if (input_rank == 1) { | |
// if input_rank is 1, batch_size is interpreted to be 1 | |
batch_size_dim.set_dim_value(1); | |
} else if (input_rank == 2) { | |
batch_size_dim = input_shape.dim((int)0); | |
} else { | |
fail_shape_inference("Input's shape should be 1D or 2D"); | |
} | |
} | |
updateOutputShape(ctx, 0, {batch_size_dim}); | |
updateOutputShape(ctx, 1, {batch_size_dim, class_count_dim}); | |
})); | |
static const char* LinearRegressor_ver1_doc = R"DOC( | |
Generalized linear regression evaluation.<br> | |
If targets is set to 1 (default) then univariate regression is performed.<br> | |
If targets is set to M then M sets of coefficients must be passed in as a sequence | |
and M results will be output for each input n in N.<br> | |
The coefficients array is of length n, and the coefficients for each target are contiguous. | |
Intercepts are optional but if provided must match the number of targets. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
LinearRegressor, | |
1, | |
OpSchema() | |
.SetDoc(LinearRegressor_ver1_doc) | |
.Input(0, "X", "Data to be regressed.", "T") | |
.Output(0, "Y", "Regression outputs (one per target, per example).", "tensor(float)") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input must be a tensor of a numeric type.") | |
.Attr( | |
"post_transform", | |
"Indicates the transform to apply to the regression output vector.<br>One of 'NONE,' 'SOFTMAX,' " | |
"'LOGISTIC,' 'SOFTMAX_ZERO,' or 'PROBIT'", | |
AttributeProto::STRING, | |
std::string("NONE")) | |
.Attr("coefficients", "Weights of the model(s).", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr("intercepts", "Weights of the intercepts, if used.", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr( | |
"targets", | |
"The total number of regression targets, 1 if not defined.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1))); | |
static const char* Normalizer_ver1_doc = R"DOC( | |
Normalize the input. There are three normalization modes, which have the corresponding formulas, | |
defined using element-wise infix operators '/' and '^' and tensor-wide functions 'max' and 'sum':<br> | |
<br> | |
Max: Y = X / max(X)<br> | |
L1: Y = X / sum(X)<br> | |
L2: Y = sqrt(X^2 / sum(X^2)}<br> | |
In all modes, if the divisor is zero, Y == X. | |
<br> | |
For batches, that is, [N,C] tensors, normalization is done along the C axis. In other words, each row | |
of the batch is normalized independently. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
Normalizer, | |
1, | |
OpSchema() | |
.SetDoc(Normalizer_ver1_doc) | |
.Input(0, "X", "Data to be encoded, a tensor of shape [N,C] or [C]", "T") | |
.Output(0, "Y", "Encoded output data", "tensor(float)") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input must be a tensor of a numeric type.") | |
.Attr("norm", "One of 'MAX,' 'L1,' 'L2'", AttributeProto::STRING, std::string("MAX"))); | |
static const char* OneHotEncoder_ver1_doc = R"DOC( | |
Replace each input element with an array of ones and zeros, where a single | |
one is placed at the index of the category that was passed in. The total category count | |
will determine the size of the extra dimension of the output array Y.<br> | |
For example, if we pass a tensor with a single value of 4, and a category count of 8, | |
the output will be a tensor with ``[0,0,0,0,1,0,0,0]``.<br> | |
This operator assumes every input feature is from the same set of categories.<br> | |
If the input is a tensor of float, int32, or double, the data will be cast | |
to integers and the cats_int64s category list will be used for the lookups. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
OneHotEncoder, | |
1, | |
OpSchema() | |
.SetDoc(OneHotEncoder_ver1_doc) | |
.Input(0, "X", "Data to be encoded.", "T") | |
.Output(0, "Y", "Encoded output data, having one more dimension than X.", "tensor(float)") | |
.TypeConstraint( | |
"T", | |
{"tensor(string)", "tensor(int64)", "tensor(int32)", "tensor(float)", "tensor(double)"}, | |
"The input must be a tensor of a numeric type.") | |
.Attr( | |
"cats_int64s", | |
"List of categories, ints.<br>One and only one of the 'cats_*' attributes must be defined.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"cats_strings", | |
"List of categories, strings.<br>One and only one of the 'cats_*' attributes must be defined.", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"zeros", | |
"If true and category is not present, will return all zeros; if false and a category if not found, the " | |
"operator will fail.", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
std::vector<int64_t> cats_int64s; | |
bool has_int64s = getRepeatedAttribute(ctx, "cats_int64s", cats_int64s); | |
std::vector<std::string> cats_strings; | |
bool has_strings = getRepeatedAttribute(ctx, "cats_strings", cats_strings); | |
if (has_int64s == has_strings) { | |
fail_shape_inference("Exactly one of 'cats_*' attributes must be provided."); | |
} | |
const TensorShapeProto& input_shape = ctx.getInputType(0)->tensor_type().shape(); | |
TensorShapeProto* shape = ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape(); | |
for (int i = 0; i < input_shape.dim_size(); i++) { | |
*shape->add_dim() = input_shape.dim(i); | |
} | |
shape->add_dim()->set_dim_value(std::max(cats_int64s.size(), cats_strings.size())); | |
updateOutputElemType(ctx, 0, TensorProto::FLOAT); | |
})); | |
static const char* Scaler_ver1_doc = R"DOC( | |
Rescale input data, for example to standardize features by removing the mean and scaling to unit variance. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
Scaler, | |
1, | |
OpSchema() | |
.SetDoc(Scaler_ver1_doc) | |
.Input(0, "X", "Data to be scaled.", "T") | |
.Output(0, "Y", "Scaled output data.", "tensor(float)") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input must be a tensor of a numeric type.") | |
.Attr( | |
"offset", | |
"First, offset by this.<br>Can be length of features in an [N,F] tensor or length 1, in which case it " | |
"applies to all features, regardless of dimension count.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"scale", | |
"Second, multiply by this.<br>Can be length of features in an [N,F] tensor or length 1, in which case it " | |
"applies to all features, regardless of dimension count.<br>Must be same length as 'offset'", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE)); | |
static const char* SVMClassifier_ver1_doc = R"DOC( | |
Support Vector Machine classifier | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
SVMClassifier, | |
1, | |
OpSchema() | |
.SetDoc(SVMClassifier_ver1_doc) | |
.Input(0, "X", "Data to be classified.", "T1") | |
.Output(0, "Y", "Classification outputs (one class per example).", "T2") | |
.Output( | |
1, | |
"Z", | |
"Class scores (one per class per example), if prob_a and prob_b are provided they are probabilities for " | |
"each class, otherwise they are raw scores.", | |
"tensor(float)") | |
.TypeConstraint( | |
"T1", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input must be a tensor of a numeric type, either [C] or [N,C].") | |
.TypeConstraint( | |
"T2", | |
{"tensor(string)", "tensor(int64)"}, | |
"The output type will be a tensor of strings or integers, depending on which of the classlabels_* " | |
"attributes is used. Its size will match the bactch size of the input.") | |
.Attr( | |
"kernel_type", | |
"The kernel type, one of 'LINEAR,' 'POLY,' 'RBF,' 'SIGMOID'.", | |
AttributeProto::STRING, | |
std::string("LINEAR")) | |
.Attr( | |
"kernel_params", | |
"List of 3 elements containing gamma, coef0, and degree, in that order. Zero if unused for the kernel.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr("vectors_per_class", "", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("support_vectors", "", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr("coefficients", "", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr("prob_a", "First set of probability coefficients.", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr( | |
"prob_b", | |
"Second set of probability coefficients. This array must be same size as prob_a.<br>If these are provided " | |
"then output Z are probability estimates, otherwise they are raw scores.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr("rho", "", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr( | |
"post_transform", | |
"Indicates the transform to apply to the score. <br>One of 'NONE,' 'SOFTMAX,' 'LOGISTIC,' 'SOFTMAX_ZERO,' " | |
"or 'PROBIT'", | |
AttributeProto::STRING, | |
std::string("NONE")) | |
.Attr( | |
"classlabels_strings", | |
"Class labels if using string labels.<br>One and only one of the 'classlabels_*' attributes must be " | |
"defined.", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"classlabels_ints", | |
"Class labels if using integer labels.<br>One and only one of the 'classlabels_*' attributes must be " | |
"defined.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
std::vector<std::string> label_strs; | |
auto result = getRepeatedAttribute(ctx, "classlabels_strings", label_strs); | |
bool using_strings = (result && !label_strs.empty()); | |
auto output_elem_type = ctx.getOutputType(0)->mutable_tensor_type(); | |
if (using_strings) { | |
output_elem_type->set_elem_type(TensorProto::STRING); | |
} else { | |
output_elem_type->set_elem_type(TensorProto::INT64); | |
} | |
})); | |
static const char* SVMRegressor_ver1_doc = R"DOC( | |
Support Vector Machine regression prediction and one-class SVM anomaly detection. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
SVMRegressor, | |
1, | |
OpSchema() | |
.SetDoc(SVMRegressor_ver1_doc) | |
.Input(0, "X", "Data to be regressed.", "T") | |
.Output(0, "Y", "Regression outputs (one score per target per example).", "tensor(float)") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input type must be a tensor of a numeric type, either [C] or [N,C].") | |
.Attr( | |
"kernel_type", | |
"The kernel type, one of 'LINEAR,' 'POLY,' 'RBF,' 'SIGMOID'.", | |
AttributeProto::STRING, | |
std::string("LINEAR")) | |
.Attr( | |
"kernel_params", | |
"List of 3 elements containing gamma, coef0, and degree, in that order. Zero if unused for the kernel.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr("support_vectors", "Chosen support vectors", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr( | |
"one_class", | |
"Flag indicating whether the regression is a one-class SVM or not.", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Attr("coefficients", "Support vector coefficients.", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr("n_supports", "The number of support vectors.", AttributeProto::INT, static_cast<int64_t>(0)) | |
.Attr( | |
"post_transform", | |
"Indicates the transform to apply to the score. <br>One of 'NONE,' 'SOFTMAX,' 'LOGISTIC,' 'SOFTMAX_ZERO,' " | |
"or 'PROBIT.'", | |
AttributeProto::STRING, | |
std::string("NONE")) | |
.Attr("rho", "", AttributeProto::FLOATS, OPTIONAL_VALUE)); | |
static const char* TreeEnsembleClassifier_ver5_doc = R"DOC( | |
This operator is DEPRECATED. Please use TreeEnsemble with provides similar functionality. | |
In order to determine the top class, the ArgMax node can be applied to the output of TreeEnsemble. | |
To encode class labels, use a LabelEncoder operator. | |
Tree Ensemble classifier. Returns the top class for each of N inputs.<br> | |
The attributes named 'nodes_X' form a sequence of tuples, associated by | |
index into the sequences, which must all be of equal length. These tuples | |
define the nodes.<br> | |
Similarly, all fields prefixed with 'class_' are tuples of votes at the leaves. | |
A leaf may have multiple votes, where each vote is weighted by | |
the associated class_weights index.<br> | |
One and only one of classlabels_strings or classlabels_int64s | |
will be defined. The class_ids are indices into this list. | |
All fields ending with <i>_as_tensor</i> can be used instead of the | |
same parameter without the suffix if the element type is double and not float. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
TreeEnsembleClassifier, | |
5, | |
OpSchema() | |
.Deprecate() | |
.SetDoc(TreeEnsembleClassifier_ver5_doc) | |
.Input(0, "X", "Input of shape [N,F]", "T1") | |
.Output(0, "Y", "N, Top class for each point", "T2") | |
.Output(1, "Z", "The class score for each class, for each point, a tensor of shape [N,E].", "tensor(float)") | |
.TypeConstraint( | |
"T1", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input type must be a tensor of a numeric type.") | |
.TypeConstraint( | |
"T2", | |
{"tensor(string)", "tensor(int64)"}, | |
"The output type will be a tensor of strings or integers, depending on which of the classlabels_* " | |
"attributes is used.") | |
.Attr("nodes_treeids", "Tree id for each node.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"nodes_nodeids", | |
"Node id for each node. Ids may restart at zero for each tree, but it not required to.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr("nodes_featureids", "Feature id for each node.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"nodes_values", | |
"Thresholds to do the splitting on for each node.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_values_as_tensor", | |
"Thresholds to do the splitting on for each node.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_hitrates", | |
"Popularity of each node, used for performance and may be omitted.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_hitrates_as_tensor", | |
"Popularity of each node, used for performance and may be omitted.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_modes", | |
"The node kind, that is, the comparison to make at the node. There is no comparison to make at a leaf " | |
"node.<br>One of 'BRANCH_LEQ', 'BRANCH_LT', 'BRANCH_GTE', 'BRANCH_GT', 'BRANCH_EQ', 'BRANCH_NEQ', 'LEAF'", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr("nodes_truenodeids", "Child node if expression is true.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("nodes_falsenodeids", "Child node if expression is false.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"nodes_missing_value_tracks_true", | |
"For each node, define what to do in the presence of a missing value: if a value is missing (NaN), use the " | |
"'true' or 'false' branch based on the value in this array.<br>This attribute may be left undefined, and " | |
"the default value is false (0) for all nodes.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr("class_treeids", "The id of the tree that this node is in.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("class_nodeids", "node id that this weight is for.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("class_ids", "The index of the class list that each weight is for.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("class_weights", "The weight for the class in class_id.", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr( | |
"class_weights_as_tensor", | |
"The weight for the class in class_id.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr( | |
"classlabels_strings", | |
"Class labels if using string labels.<br>One and only one of the 'classlabels_*' attributes must be " | |
"defined.", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"classlabels_int64s", | |
"Class labels if using integer labels.<br>One and only one of the 'classlabels_*' attributes must be " | |
"defined.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"post_transform", | |
"Indicates the transform to apply to the score. <br> One of 'NONE,' 'SOFTMAX,' 'LOGISTIC,' 'SOFTMAX_ZERO,' " | |
"or 'PROBIT.'", | |
AttributeProto::STRING, | |
std::string("NONE")) | |
.Attr( | |
"base_values", | |
"Base values for classification, added to final class score; the size must be the same as the classes or " | |
"can be left unassigned (assumed 0)", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"base_values_as_tensor", | |
"Base values for classification, added to final class score; the size must be the same as the classes or " | |
"can be left unassigned (assumed 0)", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE)); | |
static const char* TreeEnsembleRegressor_ver5_doc = R"DOC( | |
This operator is DEPRECATED. Please use TreeEnsemble instead which provides the same | |
functionality.<br> | |
Tree Ensemble regressor. Returns the regressed values for each input in N.<br> | |
All args with nodes_ are fields of a tuple of tree nodes, and | |
it is assumed they are the same length, and an index i will decode the | |
tuple across these inputs. Each node id can appear only once | |
for each tree id.<br> | |
All fields prefixed with target_ are tuples of votes at the leaves.<br> | |
A leaf may have multiple votes, where each vote is weighted by | |
the associated target_weights index.<br> | |
All fields ending with <i>_as_tensor</i> can be used instead of the | |
same parameter without the suffix if the element type is double and not float. | |
All trees must have their node ids start at 0 and increment by 1.<br> | |
Mode enum is BRANCH_LEQ, BRANCH_LT, BRANCH_GTE, BRANCH_GT, BRANCH_EQ, BRANCH_NEQ, LEAF | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
TreeEnsembleRegressor, | |
5, | |
OpSchema() | |
.Deprecate() | |
.SetDoc(TreeEnsembleRegressor_ver5_doc) | |
.Input(0, "X", "Input of shape [N,F]", "T") | |
.Output(0, "Y", "N classes", "tensor(float)") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(int64)", "tensor(int32)"}, | |
"The input type must be a tensor of a numeric type.") | |
.Attr("nodes_treeids", "Tree id for each node.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"nodes_nodeids", | |
"Node id for each node. Node ids must restart at zero for each tree and increase sequentially.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr("nodes_featureids", "Feature id for each node.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"nodes_values", | |
"Thresholds to do the splitting on for each node.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_values_as_tensor", | |
"Thresholds to do the splitting on for each node.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_hitrates", | |
"Popularity of each node, used for performance and may be omitted.", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_hitrates_as_tensor", | |
"Popularity of each node, used for performance and may be omitted.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_modes", | |
"The node kind, that is, the comparison to make at the node. There is no comparison to make at a leaf " | |
"node.<br>One of 'BRANCH_LEQ', 'BRANCH_LT', 'BRANCH_GTE', 'BRANCH_GT', 'BRANCH_EQ', 'BRANCH_NEQ', 'LEAF'", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr("nodes_truenodeids", "Child node if expression is true", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("nodes_falsenodeids", "Child node if expression is false", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr( | |
"nodes_missing_value_tracks_true", | |
"For each node, define what to do in the presence of a NaN: use the 'true' (if the attribute value is 1) " | |
"or 'false' (if the attribute value is 0) branch based on the value in this array.<br>This attribute may " | |
"be left undefined and the default value is false (0) for all nodes.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr("target_treeids", "The id of the tree that each node is in.", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("target_nodeids", "The node id of each weight", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("target_ids", "The index of the target that each weight is for", AttributeProto::INTS, OPTIONAL_VALUE) | |
.Attr("target_weights", "The weight for each target", AttributeProto::FLOATS, OPTIONAL_VALUE) | |
.Attr("target_weights_as_tensor", "The weight for each target", AttributeProto::TENSOR, OPTIONAL_VALUE) | |
.Attr("n_targets", "The total number of targets.", AttributeProto::INT, OPTIONAL_VALUE) | |
.Attr( | |
"post_transform", | |
"Indicates the transform to apply to the score. <br>One of 'NONE,' 'SOFTMAX,' 'LOGISTIC,' 'SOFTMAX_ZERO,' " | |
"or 'PROBIT'", | |
AttributeProto::STRING, | |
std::string("NONE")) | |
.Attr( | |
"aggregate_function", | |
"Defines how to aggregate leaf values within a target. <br>One of 'AVERAGE,' 'SUM,' 'MIN,' 'MAX.'", | |
AttributeProto::STRING, | |
std::string("SUM")) | |
.Attr( | |
"base_values", | |
"Base values for regression, added to final prediction after applying aggregate_function; the size must be " | |
"the same as the classes or can be left unassigned (assumed 0)", | |
AttributeProto::FLOATS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"base_values_as_tensor", | |
"Base values for regression, added to final prediction after applying aggregate_function; the size must be " | |
"the same as the classes or can be left unassigned (assumed 0)", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE)); | |
static const char* TreeEnsemble_ver5_doc = R"DOC( | |
Tree Ensemble operator. Returns the regressed values for each input in a batch. | |
Inputs have dimensions `[N, F]` where `N` is the input batch size and `F` is the number of input features. | |
Outputs have dimensions `[N, num_targets]` where `N` is the batch size and `num_targets` is the number of targets, which is a configurable attribute. | |
The encoding of this attribute is split along interior nodes and the leaves of the trees. Notably, attributes with the prefix `nodes_*` are associated with interior nodes, and attributes with the prefix `leaf_*` are associated with leaves. | |
The attributes `nodes_*` must all have the same length and encode a sequence of tuples, as defined by taking all the `nodes_*` fields at a given position. | |
All fields prefixed with `leaf_*` represent tree leaves, and similarly define tuples of leaves and must have identical length. | |
This operator can be used to implement both the previous `TreeEnsembleRegressor` and `TreeEnsembleClassifier` nodes. | |
The `TreeEnsembleRegressor` node maps directly to this node and requires changing how the nodes are represented. | |
The `TreeEnsembleClassifier` node can be implemented by adding a `ArgMax` node after this node to determine the top class. | |
To encode class labels, a `LabelEncoder` or `GatherND` operator may be used. | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
TreeEnsemble, | |
5, | |
OpSchema() | |
.SetDoc(TreeEnsemble_ver5_doc) | |
.Input(0, "X", "Input of shape [Batch Size, Number of Features]", "T") | |
.Output(0, "Y", "Output of shape [Batch Size, Number of targets]", "T") | |
.TypeConstraint( | |
"T", | |
{"tensor(float)", "tensor(double)", "tensor(float16)"}, | |
"The input type must be a tensor of a numeric type.") | |
.Attr("nodes_featureids", "Feature id for each node.", AttributeProto::INTS, true) | |
.Attr( | |
"nodes_splits", | |
"Thresholds to do the splitting on for each node with mode that is not 'BRANCH_MEMBER'.", | |
AttributeProto::TENSOR, | |
true) | |
.Attr( | |
"nodes_hitrates", | |
"Popularity of each node, used for performance and may be omitted.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr( | |
"nodes_modes", | |
"The comparison operation performed by the node. This is encoded as an enumeration of 0 ('BRANCH_LEQ'), 1 " | |
"('BRANCH_LT'), 2 ('BRANCH_GTE'), 3 ('BRANCH_GT'), 4 ('BRANCH_EQ'), 5 ('BRANCH_NEQ'), and 6 " | |
"('BRANCH_MEMBER'). Note this is a tensor of type uint8.", | |
AttributeProto::TENSOR, | |
true) | |
.Attr( | |
"nodes_truenodeids", | |
"If `nodes_trueleafs` is false at an entry, this represents the position of the true branch node. This " | |
"position can be used to index into a `nodes_*` entry. If `nodes_trueleafs` is false, it is an index into " | |
"the leaf_* attributes.", | |
AttributeProto::INTS, | |
true) | |
.Attr( | |
"nodes_falsenodeids", | |
"If `nodes_falseleafs` is false at an entry, this represents the position of the false branch node. This " | |
"position can be used to index into a `nodes_*` entry. If `nodes_falseleafs` is false, it is an index into " | |
"the leaf_* attributes.", | |
AttributeProto::INTS, | |
true) | |
.Attr( | |
"nodes_trueleafs", | |
"1 if true branch is leaf for each node and 0 an interior node. To represent a tree that is a leaf (only " | |
"has one node), one can do so by having a single `nodes_*` entry with true and false branches referencing " | |
"the same `leaf_*` entry", | |
AttributeProto::INTS, | |
true) | |
.Attr( | |
"nodes_falseleafs", | |
"1 if false branch is leaf for each node and 0 if an interior node. To represent a tree that is a leaf " | |
"(only has one node), one can do so by having a single `nodes_*` entry with true and false branches " | |
"referencing the same `leaf_*` entry", | |
AttributeProto::INTS, | |
true) | |
.Attr( | |
"nodes_missing_value_tracks_true", | |
"For each node, define whether to follow the true branch (if attribute value is 1) or false branch (if " | |
"attribute value is 0) in the presence of a NaN input feature. This attribute may be left undefined and " | |
"the default value is false (0) for all nodes.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"tree_roots", | |
"Index into `nodes_*` for the root of each tree. The tree structure is derived from the branching of each " | |
"node.", | |
AttributeProto::INTS, | |
true) | |
.Attr( | |
"membership_values", | |
"Members to test membership of for each set membership node. List all of the members to test again in the " | |
"order that the 'BRANCH_MEMBER' mode appears in `node_modes`, delimited by `NaN`s. Will have the same " | |
"number " | |
"of sets of values as nodes with mode 'BRANCH_MEMBER'. This may be omitted if the node doesn't contain any " | |
"'BRANCH_MEMBER' nodes.", | |
AttributeProto::TENSOR, | |
OPTIONAL_VALUE) | |
.Attr( | |
"leaf_targetids", | |
"The index of the target that this leaf contributes to (this must be in range `[0, n_targets)`).", | |
AttributeProto::INTS, | |
true) | |
.Attr("leaf_weights", "The weight for each leaf.", AttributeProto::TENSOR, true) | |
.Attr("n_targets", "The total number of targets.", AttributeProto::INT, OPTIONAL_VALUE) | |
.Attr( | |
"post_transform", | |
"Indicates the transform to apply to the score. <br>One of 'NONE' (0), 'SOFTMAX' (1), 'LOGISTIC' (2), " | |
"'SOFTMAX_ZERO' (3) or 'PROBIT' (4), defaults to 'NONE' (0)", | |
AttributeProto::INT, | |
static_cast<int64_t>(0)) | |
.Attr( | |
"aggregate_function", | |
"Defines how to aggregate leaf values within a target. <br>One of 'AVERAGE' (0) 'SUM' (1) 'MIN' (2) 'MAX " | |
"(3) defaults to 'SUM' (1)", | |
AttributeProto::INT, | |
static_cast<int64_t>(1)) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
checkInputRank(ctx, 0, 2); | |
auto* nodes_splits = ctx.getAttribute("nodes_splits"); | |
if (nullptr == nodes_splits) { | |
fail_shape_inference("Attribute 'nodes_splits' is required."); | |
} | |
if (nodes_splits->t().dims_size() != 1) { | |
fail_shape_inference("Attribute 'nodes_splits' must be 1D."); | |
} | |
auto input_type = ctx.getInputType(0)->tensor_type().elem_type(); | |
// Check that input type is same as split type | |
if (input_type != nodes_splits->t().data_type()) { | |
fail_shape_inference( | |
"Attribute 'nodes_splits' must have same type as input. Input type is ", | |
input_type, | |
" and attribute type is ", | |
nodes_splits->t().data_type()); | |
} | |
// Expected nodes_* length | |
auto expected_length = nodes_splits->t().dims(0); | |
// Validate all nodes_* attributes that are set have the same length and are 1D. | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_featureids"), expected_length, TensorProto_DataType_INT64, true); | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_hitrates"), expected_length, TensorProto_DataType_FLOAT, false); | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_modes"), expected_length, TensorProto_DataType_UINT8, true); | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_truenodeids"), expected_length, TensorProto_DataType_INT64, true); | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_falsenodeids"), expected_length, TensorProto_DataType_INT64, true); | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_trueleafs"), expected_length, TensorProto_DataType_INT64, true); | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_falseleafs"), expected_length, TensorProto_DataType_INT64, true); | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_missing_value_tracks_true"), expected_length, TensorProto_DataType_INT64, false); | |
// The set membership values and the splits must have the same type as the input. | |
auto* membership_values = ctx.getAttribute("membership_values"); | |
if (nullptr != membership_values && membership_values->t().data_type() != input_type) { | |
fail_shape_inference( | |
"Attribute 'membership_values' must have same type as input. Input type is ", | |
input_type, | |
" and attribute type is ", | |
membership_values->t().data_type()); | |
} | |
AssertAttributeProtoTypeAndLength( | |
ctx.getAttribute("nodes_splits"), expected_length, static_cast<TensorProto_DataType>(input_type), true); | |
// Validate all leaf_* attributes that are set have the same length and are 1D. | |
auto* leaf_targetids = ctx.getAttribute("leaf_targetids"); | |
auto* leaf_weights = ctx.getAttribute("leaf_weights"); | |
if (nullptr != leaf_targetids && nullptr != leaf_weights) { | |
if (leaf_targetids->ints_size() != leaf_weights->t().dims(0)) { | |
fail_shape_inference( | |
"Attribute 'leaf_targetids' must have same length as attribute 'leaf_weights'. 'leaf_targetids' " | |
"length is ", | |
leaf_targetids->ints_size(), | |
" and 'leaf_weights' length is ", | |
leaf_weights->t().dims(0)); | |
} | |
} else { | |
fail_shape_inference("Attributes 'leaf_targetids' and 'leaf_weights' must both be set."); | |
} | |
// Validate weights have same type as input. | |
if (leaf_weights->t().data_type() != input_type) { | |
fail_shape_inference( | |
"Attribute 'leaf_weights' must have same type as input. Input type is ", | |
input_type, | |
" and attribute type is ", | |
leaf_weights->t().data_type()); | |
} | |
checkInputRank(ctx, 0, 2); | |
Dim N, E; | |
unifyInputDim(ctx, 0, 0, N); | |
if (nullptr != ctx.getAttribute("n_targets")) { | |
unifyDim(E, ctx.getAttribute("n_targets")->i()); | |
} | |
updateOutputElemType(ctx, 0, input_type); | |
updateOutputShape(ctx, 0, {N, E}); | |
})); | |
static const char* ZipMap_ver1_doc = R"DOC( | |
Creates a map from the input and the attributes.<br> | |
The values are provided by the input tensor, while the keys are specified by the attributes. | |
Must provide keys in either classlabels_strings or classlabels_int64s (but not both).<br> | |
The columns of the tensor correspond one-by-one to the keys specified by the attributes. There must be as many columns as keys.<br> | |
)DOC"; | |
ONNX_ML_OPERATOR_SET_SCHEMA( | |
ZipMap, | |
1, | |
OpSchema() | |
.SetDoc(ZipMap_ver1_doc) | |
.Input(0, "X", "The input values", "tensor(float)") | |
.Output(0, "Z", "The output map", "T") | |
.TypeConstraint( | |
"T", | |
{"seq(map(string, float))", "seq(map(int64, float))"}, | |
"The output will be a sequence of string or integer maps to float.") | |
.Attr( | |
"classlabels_strings", | |
"The keys when using string keys.<br>One and only one of the 'classlabels_*' attributes must be defined.", | |
AttributeProto::STRINGS, | |
OPTIONAL_VALUE) | |
.Attr( | |
"classlabels_int64s", | |
"The keys when using int keys.<br>One and only one of the 'classlabels_*' attributes must be defined.", | |
AttributeProto::INTS, | |
OPTIONAL_VALUE) | |
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | |
std::vector<std::string> classlabels_strings; | |
bool result = getRepeatedAttribute(ctx, "classlabels_strings", classlabels_strings); | |
auto output_map_type = ctx.getOutputType(0)->mutable_sequence_type()->mutable_elem_type()->mutable_map_type(); | |
auto output_value_tensor_type = output_map_type->mutable_value_type()->mutable_tensor_type(); | |
output_value_tensor_type->set_elem_type(TensorProto::FLOAT); | |
output_value_tensor_type->mutable_shape(); // Initialize to scalar | |
if (hasInputShape(ctx, 0) && getInputShape(ctx, 0).dim_size() != 1 && getInputShape(ctx, 0).dim_size() != 2) { | |
fail_shape_inference("ZipMap input shape should be 1D or 2D.") | |
} | |
if (result && !classlabels_strings.empty()) { | |
output_map_type->set_key_type(TensorProto::STRING); | |
} | |
std::vector<int64_t> classlabels_int64s; | |
result = getRepeatedAttribute(ctx, "classlabels_int64s", classlabels_int64s); | |
if (result && !classlabels_int64s.empty()) { | |
output_map_type->set_key_type(TensorProto::INT64); | |
} | |
})); | |
} // namespace ONNX_NAMESPACE | |