Spaces:
Paused
Paused
File size: 12,692 Bytes
0fdb130 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
import copy
import warnings
from typing import Callable, Optional, Union
import numpy as np
import onnx
import torch
from sentence_transformers import SentenceTransformer, models
from sklearn.linear_model import LogisticRegression
from transformers.modeling_utils import PreTrainedModel
from setfit.exporters.utils import mean_pooling
class OnnxSetFitModel(torch.nn.Module):
"""A wrapper around SetFit model body, pooler, and model head which makes ONNX exporting easier.
This wrapper creates a `nn.Module` with different levels of connectivity. We can set
`model_body` and `pooler` and have a Module which maps inputs to embeddings or we can set all three
and have a model which maps inputs to final predictions. This is useful because `torch.onnx.export`
will work with a `nn.Module`.
Attributes:
model_body (`PreTrainedModel`): The pretrained model body of a setfit model.
pooler (`Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]`, *optional*, defaults to `None`): The
callable function that can map tensors of shape (batch, sequence, embedding_dim) to shape
(batch, embedding_dim).
model_head: (`Union[nn.Module, LogisticRegression]`, *optional*, defaults to `None`): The model head from
the pretrained SetFit model. If `None`, then the resulting `OnnxSetFitModel.forward` forward method will
return embeddings instead of predictions.
"""
def __init__(
self,
model_body: PreTrainedModel,
pooler: Optional[Union[torch.nn.Module, Callable[[torch.Tensor], torch.Tensor]]] = None,
model_head: Optional[Union[torch.nn.Module, LogisticRegression]] = None,
):
super().__init__()
self.model_body = model_body
if pooler is None:
print("No pooler was set so defaulting to mean pooling.")
self.pooler = mean_pooling
else:
self.pooler = pooler
self.model_head = model_head
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
hidden_states = self.model_body(input_ids, attention_mask, token_type_ids)
hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask}
embeddings = self.pooler(hidden_states)
# If the model_head is none we are using a sklearn head and only output
# the embeddings from the setfit model
if self.model_head is None:
return embeddings
# If head is set then we have a fully torch based model and make the final predictions
# with the head.
out = self.model_head(embeddings)
return out
def export_onnx_setfit_model(setfit_model: OnnxSetFitModel, inputs, output_path, opset: int = 12):
"""Export the `OnnxSetFitModel`.
This exports the model created by the `OnnxSetFitModel` wrapper using `torch.onnx.export`.
Args:
setfit_model (`OnnxSetFitModel`): The `OnnxSetFitModel` we want to export to .onnx format.
inputs (`Dict[str, torch.Tensor]`): The inputs we would hypothetically pass to the model. These are
generated using a tokenizer.
output_path (`str`): The local path to save the onnx model to.
opset (`int`): The ONNX opset to use for the export. Defaults to 12.
"""
input_names = list(inputs.keys())
output_names = ["logits"]
# Setup the dynamic axes for onnx conversion.
dynamic_axes_input = {}
for input_name in input_names:
dynamic_axes_input[input_name] = {0: "batch_size", 1: "sequence"}
dynamic_axes_output = {}
for output_name in output_names:
dynamic_axes_output[output_name] = {0: "batch_size"}
# Move inputs to the right device
target = setfit_model.model_body.device
args = tuple(value.to(target) for value in inputs.values())
setfit_model.eval()
with torch.no_grad():
torch.onnx.export(
setfit_model,
args=args,
f=output_path,
opset_version=opset,
input_names=["input_ids", "attention_mask", "token_type_ids"],
output_names=output_names,
dynamic_axes={**dynamic_axes_input, **dynamic_axes_output},
)
def export_sklearn_head_to_onnx(model_head: LogisticRegression, opset: int) -> onnx.onnx_ml_pb2.ModelProto:
"""Convert the Scikit-Learn head from a SetFitModel to ONNX format.
Args:
model_head (`LogisticRegression`): The trained SetFit model_head.
opset (`int`): The ONNX opset to use for optimizing this model. The opset is not
guaranteed and will default to the maximum version possible for the sklearn
model.
Returns:
[`onnx.onnx_ml_pb2.ModelProto`] The ONNX model generated from the sklearn head.
Raises:
ImportError: If `skl2onnx` is not installed an error will be raised asking
to install this package.
"""
# Check if skl2onnx is installed
try:
import onnxconverter_common
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import guess_data_type
from skl2onnx.sklapi import CastTransformer
from sklearn.pipeline import Pipeline
except ImportError:
msg = """
`skl2onnx` must be installed in order to convert a model with an sklearn head.
Please install with `pip install skl2onnx`.
"""
raise ImportError(msg)
# Determine the initial type and the shape of the output.
input_shape = (None, model_head.n_features_in_)
if hasattr(model_head, "coef_"):
dtype = guess_data_type(model_head.coef_, shape=input_shape)[0][1]
elif not hasattr(model_head, "coef_") and hasattr(model_head, "estimators_"):
if any([not hasattr(e, "coef_") for e in model_head.estimators_]):
raise ValueError(
"The model_head is a meta-estimator but not all of the estimators have a coef_ attribute."
)
dtype = guess_data_type(model_head.estimators_[0].coef_, shape=input_shape)[0][1]
else:
raise ValueError(
"The model_head either does not have a coef_ attribute or some estimators in model_head.estimators_ do not have a coef_ attribute. Conversion to ONNX only supports these cases."
)
dtype.shape = input_shape
# If the datatype of the model is double we need to cast the outputs
# from the setfit model to doubles for compatibility inside of ONNX.
if isinstance(dtype, onnxconverter_common.data_types.DoubleTensorType):
sklearn_model = Pipeline([("castdouble", CastTransformer(dtype=np.double)), ("head", model_head)])
else:
sklearn_model = model_head
# Convert sklearn head into ONNX format
onnx_model = convert_sklearn(
sklearn_model,
initial_types=[("model_head", dtype)],
target_opset=opset,
options={id(sklearn_model): {"zipmap": False}},
)
return onnx_model
def hummingbird_export(model, data_sample):
try:
from hummingbird.ml import convert
except ImportError:
raise ImportError(
"Hummingbird-ML library is not installed." "Run 'pip install hummingbird-ml' to use this type of export."
)
onnx_model = convert(model, "onnx", data_sample)
return onnx_model._model
def export_onnx(
model_body: SentenceTransformer,
model_head: Union[torch.nn.Module, LogisticRegression],
opset: int,
output_path: str = "model.onnx",
ignore_ir_version: bool = True,
use_hummingbird: bool = False,
) -> None:
"""Export a PyTorch backed SetFit model to ONNX Intermediate Representation.
Args:
model_body (`SentenceTransformer`): The model_body from a SetFit model body. This should be a
SentenceTransformer.
model_head (`torch.nn.Module` or `LogisticRegression`): The SetFit model head. This can be either a
dense layer SetFitHead or a Sklearn estimator.
opset (`int`): The actual version of the ONNX operator set to use. The final opset used might be lower.
ONNX will use the highest version supported by both the sklearn head and the model body. If versions
can't be rectified an error will be thrown.
output_path (`str`): The path where will be stored the generated ONNX model. At a minimum it needs to contain
the name of the final file.
ignore_ir_version (`bool`): Whether to ignore the IR version used in sklearn. The version is often missmatched
with the transformer models. Setting this to true coerces the versions to be the same. This might
cause errors but in practice works. If this is set to False you need to ensure that the IR versions
align between the transformer and the sklearn onnx representation.
"""
# Load the model and get all of the parts.
model_body_module = model_body._modules["0"]
model_pooler = model_body._modules["1"]
tokenizer = model_body_module.tokenizer
max_length = model_body_module.max_seq_length
transformer = model_body_module.auto_model
transformer.eval()
# Create dummy data to use during onnx export.
tokenizer_kwargs = dict(
max_length=max_length,
padding="max_length",
return_attention_mask=True,
return_token_type_ids=True,
return_tensors="pt",
)
dummy_sample = "It's a test."
dummy_inputs = tokenizer(dummy_sample, **tokenizer_kwargs)
# Check to see if the model uses a sklearn head or a torch dense layer.
if issubclass(type(model_head), models.Dense):
setfit_model = OnnxSetFitModel(transformer, lambda x: model_pooler(x)["sentence_embedding"], model_head).cpu()
export_onnx_setfit_model(setfit_model, dummy_inputs, output_path, opset)
# store meta data of the tokenizer for getting the correct tokenizer during inference
onnx_setfit_model = onnx.load(output_path)
meta = onnx_setfit_model.metadata_props.add()
for key, value in tokenizer_kwargs.items():
meta = onnx_setfit_model.metadata_props.add() # create a new key-value pair to store
meta.key = str(key)
meta.value = str(value)
else:
# Export the sklearn head first to get the minimum opset. sklearn is behind
# in supported opsets.
# Hummingbird-ML can be used as an option to export to standard opset
if use_hummingbird:
with torch.no_grad():
test_input = copy.deepcopy(dummy_inputs)
head_input = model_body(test_input)["sentence_embedding"]
onnx_head = hummingbird_export(model_head, head_input.detach().numpy())
else:
onnx_head = export_sklearn_head_to_onnx(model_head, opset)
max_opset = max([x.version for x in onnx_head.opset_import])
if max_opset != opset:
warnings.warn(
f"sklearn onnx max opset is {max_opset} requested opset {opset} using opset {max_opset} for compatibility."
)
export_onnx_setfit_model(
OnnxSetFitModel(transformer, lambda x: model_pooler(x)["sentence_embedding"]),
dummy_inputs,
output_path,
max_opset,
)
onnx_body = onnx.load(output_path)
# Check that the ir_versions are aligned and if not align them.
if ignore_ir_version:
onnx_head.ir_version = onnx_body.ir_version
elif onnx_head.ir_version != onnx_body.ir_version:
msg = f"""
IR Version mismatch between head={onnx_head.ir_version} and body={onnx_body.ir_version}
Make sure that the ONNX IR versions are aligned and supported between the chosen Sklearn model
and the transformer. You can set ignore_ir_version=True to coerce them but this might cause errors.
"""
raise ValueError(msg)
# Combine the onnx body and head by mapping the pooled output to the input of the sklearn model.
head_input_name = next(iter(onnx_head.graph.input)).name
onnx_setfit_model = onnx.compose.merge_models(
onnx_body,
onnx_head,
io_map=[("logits", head_input_name)],
)
# Save the final model.
onnx.save(onnx_setfit_model, output_path)
|