Spaces:
Paused
Paused
| # Copyright 2020 The HuggingFace Team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import unittest | |
| from transformers import is_tf_available | |
| from transformers.testing_utils import require_tf | |
| if is_tf_available(): | |
| import tensorflow as tf | |
| from tensorflow.python.eager import context | |
| from tensorflow.python.framework import ops | |
| from transformers import GradientAccumulator, create_optimizer | |
| class OptimizationFTest(unittest.TestCase): | |
| def assertListAlmostEqual(self, list1, list2, tol): | |
| self.assertEqual(len(list1), len(list2)) | |
| for a, b in zip(list1, list2): | |
| self.assertAlmostEqual(a, b, delta=tol) | |
| def testGradientAccumulator(self): | |
| accumulator = GradientAccumulator() | |
| accumulator([tf.constant([1.0, 2.0])]) | |
| accumulator([tf.constant([-2.0, 1.0])]) | |
| accumulator([tf.constant([-1.0, 2.0])]) | |
| with self.assertRaises(ValueError): | |
| accumulator([tf.constant([1.0, 1.0]), tf.constant([2.0, 2.0])]) | |
| self.assertEqual(accumulator.step, 3) | |
| self.assertEqual(len(accumulator.gradients), 1) | |
| self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [-2.0, 5.0], tol=1e-2) | |
| accumulator.reset() | |
| self.assertEqual(accumulator.step, 0) | |
| self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [0.0, 0.0], tol=1e-2) | |
| def testGradientAccumulatorDistributionStrategy(self): | |
| context._context = None | |
| ops.enable_eager_execution_internal() | |
| physical_devices = tf.config.list_physical_devices("CPU") | |
| if len(physical_devices) == 1: | |
| tf.config.set_logical_device_configuration( | |
| physical_devices[0], [tf.config.LogicalDeviceConfiguration(), tf.config.LogicalDeviceConfiguration()] | |
| ) | |
| devices = tf.config.list_logical_devices(device_type="CPU") | |
| strategy = tf.distribute.MirroredStrategy(devices=devices[:2]) | |
| with strategy.scope(): | |
| accumulator = GradientAccumulator() | |
| variable = tf.Variable([4.0, 3.0]) | |
| optimizer, _ = create_optimizer(5e-5, 10, 5) | |
| gradient_placeholder = tf.Variable([0.0, 0.0], trainable=False) | |
| def accumulate_on_replica(gradient): | |
| accumulator([gradient]) | |
| def apply_on_replica(): | |
| optimizer.apply_gradients(list(zip(accumulator.gradients, [variable]))) | |
| def accumulate(grad1, grad2): | |
| with strategy.scope(): | |
| local_variables = strategy.experimental_local_results(gradient_placeholder) | |
| local_variables[0].assign(grad1) | |
| local_variables[1].assign(grad2) | |
| strategy.run(accumulate_on_replica, args=(gradient_placeholder,)) | |
| def apply_grad(): | |
| with strategy.scope(): | |
| strategy.run(apply_on_replica) | |
| def _check_local_values(grad1, grad2): | |
| values = strategy.experimental_local_results(accumulator._gradients[0]) | |
| self.assertListAlmostEqual(values[0].value(), grad1, tol=1e-2) | |
| self.assertListAlmostEqual(values[1].value(), grad2, tol=1e-2) | |
| accumulate([1.0, 2.0], [-1.0, 1.0]) | |
| accumulate([3.0, -1.0], [-1.0, -1.0]) | |
| accumulate([-2.0, 2.0], [3.0, -2.0]) | |
| self.assertEqual(accumulator.step, 3) | |
| _check_local_values([2.0, 3.0], [1.0, -2.0]) | |
| apply_grad() | |
| self.assertListAlmostEqual(variable.value(), [4.0, 3.0], tol=1e-2) | |
| accumulator.reset() | |
| self.assertEqual(accumulator.step, 0) | |
| _check_local_values([0.0, 0.0], [0.0, 0.0]) | |