Upload custom kernels

Browse files

Files changed (5) hide show

build/torch-universal/liger_kernels/__init__.py +2 -1
build/torch-universal/liger_kernels/_ops.py +2 -2
build/torch-universal/liger_kernels/rms_norm.py +41 -1
torch-ext/liger_kernels/__init__.py +2 -1
torch-ext/liger_kernels/rms_norm.py +41 -1

build/torch-universal/liger_kernels/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ from .group_norm import LigerGroupNormFunction
 from .kl_div import LigerKLDivLossFunction
 from .layer_norm import LigerLayerNormFunction
 from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
-from .rms_norm import LigerRMSNormFunction
 from .jsd import LigerJSDFunction
 from .rope import LigerRopeFunction
 from .swiglu import LigerSiLUMulFunction
@@ -22,6 +22,7 @@ __all__ = [
     "LigerLayerNormFunction",
     "LigerQwen2VLMRopeFunction",
     "LigerRMSNormFunction",
     "LigerJSDFunction",
     "LigerRopeFunction",
     "LigerSiLUMulFunction",

 from .kl_div import LigerKLDivLossFunction
 from .layer_norm import LigerLayerNormFunction
 from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
+from .rms_norm import LigerRMSNormFunction, LigerRMSNorm
 from .jsd import LigerJSDFunction
 from .rope import LigerRopeFunction
 from .swiglu import LigerSiLUMulFunction
     "LigerLayerNormFunction",
     "LigerQwen2VLMRopeFunction",
     "LigerRMSNormFunction",
+    "LigerRMSNorm",
     "LigerJSDFunction",
     "LigerRopeFunction",
     "LigerSiLUMulFunction",

build/torch-universal/liger_kernels/_ops.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import torch
-ops = torch.ops._liger_kernels_20250505101012
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_liger_kernels_20250505101012::{op_name}"

 import torch
+ops = torch.ops._liger_kernels_20250507090511
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_liger_kernels_20250507090511::{op_name}"

build/torch-universal/liger_kernels/rms_norm.py CHANGED Viewed

@@ -362,4 +362,44 @@ class LigerRMSNormFunction(torch.autograd.Function):
             ctx.num_warps,
             ctx.in_place,
         )
-        return dX, dW, None, None, None, None

             ctx.num_warps,
             ctx.in_place,
         )
+        return dX, dW, None, None, None, None
+class LigerRMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    def __init__(self, hidden_size, eps=1e-6, offset=0.0, casting_mode="llama", in_place=True):
+        super().__init__()
+        self.weight = torch.nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+        self.offset = offset
+        self.casting_mode = casting_mode
+        self.in_place = in_place
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return LigerRMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+            self.offset,
+            self.casting_mode,
+            self.in_place
+        )

torch-ext/liger_kernels/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ from .group_norm import LigerGroupNormFunction
 from .kl_div import LigerKLDivLossFunction
 from .layer_norm import LigerLayerNormFunction
 from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
-from .rms_norm import LigerRMSNormFunction
 from .jsd import LigerJSDFunction
 from .rope import LigerRopeFunction
 from .swiglu import LigerSiLUMulFunction
@@ -22,6 +22,7 @@ __all__ = [
     "LigerLayerNormFunction",
     "LigerQwen2VLMRopeFunction",
     "LigerRMSNormFunction",
     "LigerJSDFunction",
     "LigerRopeFunction",
     "LigerSiLUMulFunction",

 from .kl_div import LigerKLDivLossFunction
 from .layer_norm import LigerLayerNormFunction
 from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
+from .rms_norm import LigerRMSNormFunction, LigerRMSNorm
 from .jsd import LigerJSDFunction
 from .rope import LigerRopeFunction
 from .swiglu import LigerSiLUMulFunction
     "LigerLayerNormFunction",
     "LigerQwen2VLMRopeFunction",
     "LigerRMSNormFunction",
+    "LigerRMSNorm",
     "LigerJSDFunction",
     "LigerRopeFunction",
     "LigerSiLUMulFunction",

torch-ext/liger_kernels/rms_norm.py CHANGED Viewed

@@ -362,4 +362,44 @@ class LigerRMSNormFunction(torch.autograd.Function):
             ctx.num_warps,
             ctx.in_place,
         )
-        return dX, dW, None, None, None, None

             ctx.num_warps,
             ctx.in_place,
         )
+        return dX, dW, None, None, None, None
+class LigerRMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    def __init__(self, hidden_size, eps=1e-6, offset=0.0, casting_mode="llama", in_place=True):
+        super().__init__()
+        self.weight = torch.nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+        self.offset = offset
+        self.casting_mode = casting_mode
+        self.in_place = in_place
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return LigerRMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+            self.offset,
+            self.casting_mode,
+            self.in_place
+        )