Spaces:

Kaizouku
/

Multi-model-Chatbot

Sleeping

App Files Files Community

Multi-model-Chatbot / public /gpt-2 /transformers /onnx /config.py

Kaizouku

Upload 564 files

2260825 verified 5 months ago

raw

history blame contribute delete

7.86 kB

	# Copyright 2021 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	from abc import ABC, abstractmethod
	from collections import OrderedDict
	from typing import Any, Mapping, Optional

	from transformers import PretrainedConfig, PreTrainedTokenizer, TensorType

	from .utils import ParameterFormat, compute_effective_axis_dimension, compute_serialized_parameters_size


	DEFAULT_ONNX_OPSET = 11

	# 2 Gb
	EXTERNAL_DATA_FORMAT_SIZE_LIMIT = 2 * 1024 * 1024 * 1024


	class OnnxConfig(ABC):
	"""
	Base class for ONNX exportable model describing metadata on how to export the model through the ONNX format.
	"""

	DEFAULT_FIXED_BATCH = 2
	DEFAULT_FIXED_SEQUENCE = 8

	def __init__(self, config: PretrainedConfig):
	self._config = config

	@classmethod
	def default(cls, config: PretrainedConfig) -> "OnnxConfig":
	"""
	Instantiate a OnnxConfig for a specific model

	Args:
	config: The model's configuration to use when exporting to ONNX

	Returns:
	OnnxConfig for this model
	"""
	return cls(config)

	@property
	@abstractmethod
	def inputs(self) -> Mapping[str, Mapping[int, str]]:
	"""
	Mapping containing the axis definition of the input tensors to provide to the model

	Returns:
	For each input: its name associated to the axes symbolic name and the axis position within the tensor
	"""
	raise NotImplementedError()

	@property
	@abstractmethod
	def outputs(self) -> Mapping[str, Mapping[int, str]]:
	"""
	Mapping containing the axis definition of the output tensors to provide to the model

	Returns:
	For each output: its name associated to the axes symbolic name and the axis position within the tensor
	"""
	raise NotImplementedError()

	@property
	def values_override(self) -> Optional[Mapping[str, Any]]:
	"""
	Dictionary of keys to override in the model's config before exporting

	Returns:
	Dictionary with the keys (and their corresponding values) to override
	"""
	if hasattr(self._config, "use_cache"):
	return {"use_cache": False}

	return None

	@property
	def default_batch_size(self) -> int:
	"""
	The default batch size to use if no other indication

	Returns:
	Integer > 0
	"""
	# Using 2 avoid ONNX making assumption about single sample batch
	return OnnxConfig.DEFAULT_FIXED_BATCH

	@property
	def default_sequence_length(self) -> int:
	"""
	The default sequence length to use if no other indication

	Returns:
	Integer > 0
	"""
	return OnnxConfig.DEFAULT_FIXED_SEQUENCE

	@property
	def default_onnx_opset(self) -> int:
	"""
	Which onnx opset to use when exporting the model

	Returns:
	Integer ONNX Opset version
	"""
	return DEFAULT_ONNX_OPSET

	@staticmethod
	def use_external_data_format(num_parameters: int) -> bool:
	"""
	Flag indicating if the model requires using external data format

	Args:
	num_parameters: Number of parameter on the model

	Returns:
	True if model.num_parameters() * size_of(float32) >= 2Gb False otherwise
	"""

	return (
	compute_serialized_parameters_size(num_parameters, ParameterFormat.Float)
	>= EXTERNAL_DATA_FORMAT_SIZE_LIMIT
	)

	def generate_dummy_inputs(
	self,
	tokenizer: PreTrainedTokenizer,
	batch_size: int = -1,
	seq_length: int = -1,
	is_pair: bool = False,
	framework: Optional[TensorType] = None,
	) -> Mapping[str, Any]:
	"""
	Generate inputs to provide to the ONNX exporter for the specific framework

	Args:
	tokenizer: The tokenizer associated with this model configuration
	batch_size: The batch size (int) to export the model for (-1 means dynamic axis)
	seq_length: The sequence length (int) to export the model for (-1 means dynamic axis)
	is_pair: Indicate if the input is a pair (sentence 1, sentence 2)
	framework: The framework (optional) the tokenizer will generate tensor for

	Returns:
	Mapping[str, Tensor] holding the kwargs to provide to the model's forward function
	"""

	# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
	batch_size = compute_effective_axis_dimension(
	batch_size, fixed_dimension=OnnxConfig.DEFAULT_FIXED_BATCH, num_token_to_add=0
	)

	# If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
	token_to_add = tokenizer.num_special_tokens_to_add(is_pair)
	seq_length = compute_effective_axis_dimension(
	seq_length, fixed_dimension=OnnxConfig.DEFAULT_FIXED_SEQUENCE, num_token_to_add=token_to_add
	)

	# Generate dummy inputs according to compute batch and sequence
	dummy_input = [" ".join([tokenizer.unk_token]) * seq_length] * batch_size
	return dict(tokenizer(dummy_input, return_tensors=framework))


	class OnnxConfigWithPast(OnnxConfig, ABC):
	def __init__(self, config: PretrainedConfig, use_past: bool = False):
	super().__init__(config)
	self.use_past = use_past

	@classmethod
	def with_past(cls, config: PretrainedConfig) -> "OnnxConfigWithPast":
	"""
	Instantiate a OnnxConfig with `use_past` attribute set to True

	Args:
	config: The underlying model's config to use when exporting to ONNX

	Returns:
	OnnxConfig with `.use_past = True`
	"""
	return cls(config, use_past=True)

	@property
	def values_override(self) -> Optional[Mapping[str, Any]]:
	if hasattr(self._config, "use_cache"):
	return {"use_cache": self.use_past}

	return None

	def generate_dummy_inputs(
	self,
	tokenizer: PreTrainedTokenizer,
	batch_size: int = -1,
	seq_length: int = -1,
	is_pair: bool = False,
	framework: Optional[TensorType] = None,
	) -> Mapping[str, Any]:
	# If dynamic axis (-1) we forward with a fixed dimension of 2 samples to avoid optimizations made by ONNX
	batch_size = compute_effective_axis_dimension(
	batch_size, fixed_dimension=self.default_batch_size, num_token_to_add=0
	)

	# If dynamic axis (-1) we forward with a fixed dimension of 8 tokens to avoid optimizations made by ONNX
	token_to_add = tokenizer.num_special_tokens_to_add(is_pair)

	# When use_past the caching mechanism requires inputs to be only 1 single token
	fixed_sequence_length = 1 if self.use_past else self.default_sequence_length
	seq_length = compute_effective_axis_dimension(
	seq_length, fixed_dimension=fixed_sequence_length, num_token_to_add=token_to_add
	)

	# Generate dummy inputs according to compute batch and sequence
	dummy_input = [" ".join([tokenizer.unk_token]) * seq_length] * batch_size
	return OrderedDict(dict(tokenizer(dummy_input, return_tensors=framework)))