Spaces:

chendl
/

multimodal

Runtime error

multimodal / transformers /tests /models /mbart /test_modeling_tf_mbart.py

add transformers

455a40f about 2 years ago

11.2 kB

	# coding=utf-8
	# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import tempfile
	import unittest

	from transformers import AutoTokenizer, MBartConfig, is_tf_available
	from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow
	from transformers.utils import cached_property

	from ...test_configuration_common import ConfigTester
	from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor
	from ...test_pipeline_mixin import PipelineTesterMixin


	if is_tf_available():
	import tensorflow as tf

	from transformers import TFAutoModelForSeq2SeqLM, TFMBartForConditionalGeneration, TFMBartModel


	@require_tf
	class TFMBartModelTester:
	config_cls = MBartConfig
	config_updates = {}
	hidden_act = "gelu"

	def __init__(
	self,
	parent,
	batch_size=13,
	seq_length=7,
	is_training=True,
	use_labels=False,
	vocab_size=99,
	hidden_size=32,
	num_hidden_layers=5,
	num_attention_heads=4,
	intermediate_size=37,
	hidden_dropout_prob=0.1,
	attention_probs_dropout_prob=0.1,
	max_position_embeddings=20,
	eos_token_id=2,
	pad_token_id=1,
	bos_token_id=0,
	):
	self.parent = parent
	self.batch_size = batch_size
	self.seq_length = seq_length
	self.is_training = is_training
	self.use_labels = use_labels
	self.vocab_size = vocab_size
	self.hidden_size = hidden_size
	self.num_hidden_layers = num_hidden_layers
	self.num_attention_heads = num_attention_heads
	self.intermediate_size = intermediate_size
	self.hidden_dropout_prob = hidden_dropout_prob
	self.attention_probs_dropout_prob = attention_probs_dropout_prob
	self.max_position_embeddings = max_position_embeddings
	self.eos_token_id = eos_token_id
	self.pad_token_id = pad_token_id
	self.bos_token_id = bos_token_id

	def prepare_config_and_inputs_for_common(self):
	input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size)
	eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1)
	input_ids = tf.concat([input_ids, eos_tensor], axis=1)

	decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

	config = self.config_cls(
	vocab_size=self.vocab_size,
	d_model=self.hidden_size,
	encoder_layers=self.num_hidden_layers,
	decoder_layers=self.num_hidden_layers,
	encoder_attention_heads=self.num_attention_heads,
	decoder_attention_heads=self.num_attention_heads,
	encoder_ffn_dim=self.intermediate_size,
	decoder_ffn_dim=self.intermediate_size,
	dropout=self.hidden_dropout_prob,
	attention_dropout=self.attention_probs_dropout_prob,
	max_position_embeddings=self.max_position_embeddings,
	eos_token_ids=[2],
	bos_token_id=self.bos_token_id,
	pad_token_id=self.pad_token_id,
	decoder_start_token_id=self.pad_token_id,
	**self.config_updates,
	)
	inputs_dict = prepare_mbart_inputs_dict(config, input_ids, decoder_input_ids)
	return config, inputs_dict

	def check_decoder_model_past_large_inputs(self, config, inputs_dict):
	model = TFMBartModel(config=config).get_decoder()
	input_ids = inputs_dict["input_ids"]

	input_ids = input_ids[:1, :]
	attention_mask = inputs_dict["attention_mask"][:1, :]
	head_mask = inputs_dict["head_mask"]
	self.batch_size = 1

	# first forward pass
	outputs = model(input_ids, attention_mask=attention_mask, head_mask=head_mask, use_cache=True)

	output, past_key_values = outputs.to_tuple()
	past_key_values = past_key_values[1]

	def test_compile_tf_model(self):
	config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

	optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
	loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
	metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
	model_class = self.all_generative_model_classes[0]
	input_ids = {
	"decoder_input_ids": tf.keras.Input(batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"),
	"input_ids": tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32"),
	}
	# Prepare our model
	model = model_class(config)
	model(self._prepare_for_class(inputs_dict, model_class)) # Model must be called before saving.
	# Let's load it from the disk to be sure we can use pretrained weights
	with tempfile.TemporaryDirectory() as tmpdirname:
	model.save_pretrained(tmpdirname)
	model = model_class.from_pretrained(tmpdirname)
	outputs_dict = model(input_ids)
	hidden_states = outputs_dict[0]
	# Add a dense layer on top to test integration with other keras modules
	outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)
	# Compile extended model
	extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
	extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])


	def prepare_mbart_inputs_dict(
	config,
	input_ids,
	decoder_input_ids,
	attention_mask=None,
	decoder_attention_mask=None,
	head_mask=None,
	decoder_head_mask=None,
	cross_attn_head_mask=None,
	):
	if attention_mask is None:
	attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int8)
	if decoder_attention_mask is None:
	decoder_attention_mask = tf.concat(
	[
	tf.ones(decoder_input_ids[:, :1].shape, dtype=tf.int8),
	tf.cast(tf.math.not_equal(decoder_input_ids[:, 1:], config.pad_token_id), tf.int8),
	],
	axis=-1,
	)
	if head_mask is None:
	head_mask = tf.ones((config.encoder_layers, config.encoder_attention_heads))
	if decoder_head_mask is None:
	decoder_head_mask = tf.ones((config.decoder_layers, config.decoder_attention_heads))
	if cross_attn_head_mask is None:
	cross_attn_head_mask = tf.ones((config.decoder_layers, config.decoder_attention_heads))
	return {
	"input_ids": input_ids,
	"decoder_input_ids": decoder_input_ids,
	"attention_mask": attention_mask,
	"decoder_attention_mask": decoder_attention_mask,
	"head_mask": head_mask,
	"decoder_head_mask": decoder_head_mask,
	"cross_attn_head_mask": cross_attn_head_mask,
	}


	@require_tf
	class TFMBartModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
	all_model_classes = (TFMBartForConditionalGeneration, TFMBartModel) if is_tf_available() else ()
	all_generative_model_classes = (TFMBartForConditionalGeneration,) if is_tf_available() else ()
	pipeline_model_mapping = (
	{
	"conversational": TFMBartForConditionalGeneration,
	"feature-extraction": TFMBartModel,
	"summarization": TFMBartForConditionalGeneration,
	"text2text-generation": TFMBartForConditionalGeneration,
	"translation": TFMBartForConditionalGeneration,
	}
	if is_tf_available()
	else {}
	)
	is_encoder_decoder = True
	test_pruning = False
	test_onnx = False

	# TODO: Fix the failed tests
	def is_pipeline_test_to_skip(
	self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
	):
	if pipeline_test_casse_name != "FeatureExtractionPipelineTests":
	# Exception encountered when calling layer '...'
	return True

	return False

	def setUp(self):
	self.model_tester = TFMBartModelTester(self)
	self.config_tester = ConfigTester(self, config_class=MBartConfig)

	def test_config(self):
	self.config_tester.run_common_tests()

	def test_decoder_model_past_large_inputs(self):
	config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
	self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)

	def test_model_common_attributes(self):
	config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

	for model_class in self.all_model_classes:
	model = model_class(config)
	assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)

	if model_class in self.all_generative_model_classes:
	x = model.get_output_embeddings()
	assert isinstance(x, tf.keras.layers.Layer)
	name = model.get_bias()
	assert isinstance(name, dict)
	for k, v in name.items():
	assert isinstance(v, tf.Variable)
	else:
	x = model.get_output_embeddings()
	assert x is None
	name = model.get_bias()
	assert name is None

	@tooslow
	def test_saved_model_creation(self):
	pass


	@require_sentencepiece
	@require_tokenizers
	@require_tf
	class TFMBartModelIntegrationTest(unittest.TestCase):
	src_text = [
	" UN Chief Says There Is No Military Solution in Syria",
	]
	expected_text = [
	"Şeful ONU declară că nu există o soluţie militară în Siria",
	]
	model_name = "facebook/mbart-large-en-ro"

	@cached_property
	def tokenizer(self):
	return AutoTokenizer.from_pretrained(self.model_name)

	@cached_property
	def model(self):
	model = TFAutoModelForSeq2SeqLM.from_pretrained(self.model_name)
	return model

	def _assert_generated_batch_equal_expected(self, **tokenizer_kwargs):
	generated_words = self.translate_src_text(**tokenizer_kwargs)
	self.assertListEqual(self.expected_text, generated_words)

	def translate_src_text(self, **tokenizer_kwargs):
	model_inputs = self.tokenizer(self.src_text, **tokenizer_kwargs, return_tensors="tf")
	generated_ids = self.model.generate(
	model_inputs.input_ids, attention_mask=model_inputs.attention_mask, num_beams=2
	)
	generated_words = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
	return generated_words

	@slow
	def test_batch_generation_en_ro(self):
	self._assert_generated_batch_equal_expected()