Spaces:
Running
Running
File size: 4,494 Bytes
dc2106c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# Copyright (c) ONNX Project Contributors
#
# SPDX-License-Identifier: Apache-2.0
import numpy as np
import onnx
from onnx.backend.test.case.base import Base
from onnx.backend.test.case.node import expect
from onnx.defs import AI_ONNX_PREVIEW_TRAINING_DOMAIN
def apply_adam(r, t, x, g, v, h, norm_coefficient, norm_coefficient_post, alpha, beta, epsilon): # type: ignore
# Add gradient of regularization term.
g_regularized = norm_coefficient * x + g
# Update momentum.
v_new = alpha * v + (1 - alpha) * g_regularized
# Update second-order momentum.
h_new = beta * h + (1 - beta) * (g_regularized * g_regularized)
# Compute element-wise square root.
h_sqrt = np.sqrt(h_new) + epsilon
# Adjust learning rate.
r_adjusted = None
if t > 0:
# Consider bias correction on momentums.
r_adjusted = r * np.sqrt(1 - beta**t) / (1 - alpha**t)
else:
# No bias correction on momentums.
r_adjusted = r
# Apply Adam update rule.
x_new = x - r_adjusted * (v_new / h_sqrt)
# It's possible to apply regularization in the end.
x_final = (1 - norm_coefficient_post) * x_new
return x_final, v_new, h_new
class Adam(Base):
@staticmethod
def export_adam() -> None:
# Define operator attributes.
norm_coefficient = 0.001
alpha = 0.95
beta = 0.1
epsilon = 1e-7
# Create operator.
node = onnx.helper.make_node(
"Adam",
inputs=["R", "T", "X", "G", "V", "H"],
outputs=["X_new", "V_new", "H_new"],
norm_coefficient=norm_coefficient,
alpha=alpha,
beta=beta,
epsilon=epsilon,
domain=AI_ONNX_PREVIEW_TRAINING_DOMAIN,
)
# Define operator inputs.
r = np.array(0.1, dtype=np.float32) # scalar
t = np.array(0, dtype=np.int64) # scalar
x = np.array([1.2, 2.8], dtype=np.float32)
g = np.array([-0.94, -2.5], dtype=np.float32)
v = np.array([1.7, 3.6], dtype=np.float32)
h = np.array([0.1, 0.1], dtype=np.float32)
# Compute expected outputs of Adam.
x_new, v_new, h_new = apply_adam(
r, t, x, g, v, h, norm_coefficient, 0.0, alpha, beta, epsilon
)
# Check results.
expect(
node,
inputs=[r, t, x, g, v, h],
outputs=[x_new, v_new, h_new],
name="test_adam",
opset_imports=[
onnx.helper.make_opsetid(AI_ONNX_PREVIEW_TRAINING_DOMAIN, 1)
],
)
@staticmethod
def export_adam_multiple() -> None:
# Define operator attributes.
norm_coefficient = 0.001
alpha = 0.95
beta = 0.85
epsilon = 1e-2
node = onnx.helper.make_node(
"Adam",
inputs=["R", "T", "X1", "X2", "G1", "G2", "V1", "V2", "H1", "H2"],
outputs=["X1_new", "X2_new", "V1_new", "V2_new", "H1_new", "H2_new"],
norm_coefficient=norm_coefficient,
alpha=alpha,
beta=beta,
domain=AI_ONNX_PREVIEW_TRAINING_DOMAIN,
)
# Define operator inputs.
r = np.array(0.1, dtype=np.float32) # scalar
t = np.array(0, dtype=np.int64) # scalar
x1 = np.array([1.0], dtype=np.float32)
g1 = np.array([-1.0], dtype=np.float32)
v1 = np.array([2.0], dtype=np.float32)
h1 = np.array([0.5], dtype=np.float32)
x2 = np.array([1.0, 2.0], dtype=np.float32)
g2 = np.array([-1.0, -3.0], dtype=np.float32)
v2 = np.array([4.0, 1.0], dtype=np.float32)
h2 = np.array([1.0, 10.0], dtype=np.float32)
# Compute expected outputs of Adam.
x1_new, v1_new, h1_new = apply_adam(
r, t, x1, g1, v1, h1, norm_coefficient, 0.0, alpha, beta, epsilon
)
x2_new, v2_new, h2_new = apply_adam(
r, t, x2, g2, v2, h2, norm_coefficient, 0.0, alpha, beta, epsilon
)
# Check results.
expect(
node,
inputs=[r, t, x1, x2, g1, g2, v1, v2, h1, h2],
outputs=[x1_new, x2_new, v1_new, v2_new, h1_new, h2_new],
name="test_adam_multiple",
opset_imports=[
onnx.helper.make_opsetid(AI_ONNX_PREVIEW_TRAINING_DOMAIN, 1)
],
)
|