Spaces:

awsaf49
/

sonics-fake-song-detection

Running

App Files Files Community

sonics-fake-song-detection / sonics /layers /transformer.py

awsaf49

Initial Commit

3f50570 10 months ago

raw

history blame contribute delete

5.1 kB

	import torch.nn as nn
	from typing import Optional

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torch.utils.checkpoint
	from torch.jit import Final

	from timm.layers import (
	Mlp,
	DropPath,
	use_fused_attn,
	)


	class Attention(nn.Module):
	fused_attn: Final[bool]

	def __init__(
	self,
	dim: int,
	num_heads: int = 8,
	qkv_bias: bool = False,
	qk_norm: bool = False,
	attn_drop: float = 0.0,
	proj_drop: float = 0.0,
	norm_layer: nn.Module = nn.LayerNorm,
	) -> None:
	super().__init__()
	assert dim % num_heads == 0, "dim should be divisible by num_heads"
	self.num_heads = num_heads
	self.head_dim = dim // num_heads
	self.scale = self.head_dim**-0.5
	self.fused_attn = use_fused_attn()

	self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
	self.q_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity()
	self.k_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity()
	self.attn_drop = nn.Dropout(attn_drop)
	self.proj = nn.Linear(dim, dim)
	self.proj_drop = nn.Dropout(proj_drop)

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	B, N, C = x.shape
	qkv = (
	self.qkv(x)
	.reshape(B, N, 3, self.num_heads, self.head_dim)
	.permute(2, 0, 3, 1, 4)
	)
	q, k, v = qkv.unbind(0)
	q, k = self.q_norm(q), self.k_norm(k)

	if self.fused_attn:
	x = F.scaled_dot_product_attention(
	q,
	k,
	v,
	dropout_p=self.attn_drop.p if self.training else 0.0,
	)
	else:
	q = q * self.scale
	attn = q @ k.transpose(-2, -1)
	attn = attn.softmax(dim=-1)
	attn = self.attn_drop(attn)
	x = attn @ v

	x = x.transpose(1, 2).reshape(B, N, C)
	x = self.proj(x)
	x = self.proj_drop(x)
	return x


	class LayerScale(nn.Module):
	def __init__(
	self,
	dim: int,
	init_values: float = 1e-5,
	inplace: bool = False,
	) -> None:
	super().__init__()
	self.inplace = inplace
	self.gamma = nn.Parameter(init_values * torch.ones(dim))

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	return x.mul_(self.gamma) if self.inplace else x * self.gamma


	class TransformerBlock(nn.Module):
	def __init__(
	self,
	dim: int,
	num_heads: int,
	mlp_ratio: float = 4.0,
	qkv_bias: bool = False,
	qk_norm: bool = False,
	proj_drop: float = 0.0,
	attn_drop: float = 0.0,
	init_values: Optional[float] = None,
	drop_path: float = 0.0,
	act_layer: nn.Module = nn.GELU,
	norm_layer: nn.Module = nn.LayerNorm,
	mlp_layer: nn.Module = Mlp,
	) -> None:
	super().__init__()
	self.norm1 = norm_layer(dim)
	self.attn = Attention(
	dim,
	num_heads=num_heads,
	qkv_bias=qkv_bias,
	qk_norm=qk_norm,
	attn_drop=attn_drop,
	proj_drop=proj_drop,
	norm_layer=norm_layer,
	)
	self.ls1 = (
	LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
	)
	self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()

	self.norm2 = norm_layer(dim)
	self.mlp = mlp_layer(
	in_features=dim,
	hidden_features=int(dim * mlp_ratio),
	act_layer=act_layer,
	drop=proj_drop,
	)
	self.ls2 = (
	LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
	)
	self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x))))
	x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
	return x


	class Transformer(nn.Module):
	"""
	Transformer layer, taken from timm library
	"""

	def __init__(
	self,
	embed_dim: int,
	num_heads: int,
	num_layers: int,
	mlp_ratio: float = 4.0,
	qkv_bias: bool = False,
	qk_norm: bool = False,
	proj_drop: float = 0.0,
	attn_drop: float = 0.0,
	drop_path: float = 0.0,
	):
	super(Transformer, self).__init__()
	self.blocks = nn.ModuleList(
	[
	TransformerBlock(
	dim=embed_dim,
	num_heads=num_heads,
	mlp_ratio=mlp_ratio,
	qkv_bias=qkv_bias,
	qk_norm=qk_norm,
	proj_drop=proj_drop,
	attn_drop=attn_drop,
	drop_path=drop_path,
	)
	for _ in range(num_layers)
	]
	)

	def forward(self, x):
	for block in self.blocks:
	x = block(x)
	return x