AnhP's picture
Upload 170 files
1e4a2ab verified
raw
history blame
4.11 kB
import os
import sys
import torch.nn as nn
import torch.nn.functional as F
sys.path.append(os.getcwd())
from main.library.predictors.FCPE.attentions import SelfAttention
from main.library.predictors.FCPE.utils import calc_same_padding, Transpose, GLU, Swish
class ConformerConvModule_LEGACY(nn.Module):
def __init__(self, dim, causal=False, expansion_factor=2, kernel_size=31, dropout=0.0):
super().__init__()
inner_dim = dim * expansion_factor
self.net = nn.Sequential(nn.LayerNorm(dim), Transpose((1, 2)), nn.Conv1d(dim, inner_dim * 2, 1), GLU(dim=1), DepthWiseConv1d_LEGACY(inner_dim, inner_dim, kernel_size=kernel_size, padding=(calc_same_padding(kernel_size) if not causal else (kernel_size - 1, 0))), Swish(), nn.Conv1d(inner_dim, dim, 1), Transpose((1, 2)), nn.Dropout(dropout))
def forward(self, x):
return self.net(x)
class ConformerConvModule(nn.Module):
def __init__(self, dim, expansion_factor=2, kernel_size=31, dropout=0):
super().__init__()
inner_dim = dim * expansion_factor
self.net = nn.Sequential(nn.LayerNorm(dim), Transpose((1, 2)), nn.Conv1d(dim, inner_dim * 2, 1), nn.GLU(dim=1), DepthWiseConv1d(inner_dim, inner_dim, kernel_size=kernel_size, padding=calc_same_padding(kernel_size)[0], groups=inner_dim), nn.SiLU(), nn.Conv1d(inner_dim, dim, 1), Transpose((1, 2)), nn.Dropout(dropout))
def forward(self, x):
return self.net(x)
class DepthWiseConv1d_LEGACY(nn.Module):
def __init__(self, chan_in, chan_out, kernel_size, padding):
super().__init__()
self.padding = padding
self.conv = nn.Conv1d(chan_in, chan_out, kernel_size, groups=chan_in)
def forward(self, x):
return self.conv(F.pad(x, self.padding))
class DepthWiseConv1d(nn.Module):
def __init__(self, chan_in, chan_out, kernel_size, padding, groups):
super().__init__()
self.conv = nn.Conv1d(chan_in, chan_out, kernel_size=kernel_size, padding=padding, groups=groups)
def forward(self, x):
return self.conv(x)
class EncoderLayer(nn.Module):
def __init__(self, parent):
super().__init__()
self.conformer = ConformerConvModule_LEGACY(parent.dim_model)
self.norm = nn.LayerNorm(parent.dim_model)
self.dropout = nn.Dropout(parent.residual_dropout)
self.attn = SelfAttention(dim=parent.dim_model, heads=parent.num_heads, causal=False)
def forward(self, phone, mask=None):
phone = phone + (self.attn(self.norm(phone), mask=mask))
return phone + (self.conformer(phone))
class ConformerNaiveEncoder(nn.Module):
def __init__(self, num_layers, num_heads, dim_model, use_norm = False, conv_only = False, conv_dropout = 0, atten_dropout = 0):
super().__init__()
self.num_layers = num_layers
self.num_heads = num_heads
self.dim_model = dim_model
self.use_norm = use_norm
self.residual_dropout = 0.1
self.attention_dropout = 0.1
self.encoder_layers = nn.ModuleList([CFNEncoderLayer(dim_model, num_heads, use_norm, conv_only, conv_dropout, atten_dropout) for _ in range(num_layers)])
def forward(self, x, mask=None):
for (_, layer) in enumerate(self.encoder_layers):
x = layer(x, mask)
return x
class CFNEncoderLayer(nn.Module):
def __init__(self, dim_model, num_heads = 8, use_norm = False, conv_only = False, conv_dropout = 0, atten_dropout = 0):
super().__init__()
self.conformer = nn.Sequential(ConformerConvModule(dim_model), nn.Dropout(conv_dropout)) if conv_dropout > 0 else ConformerConvModule(dim_model)
self.norm = nn.LayerNorm(dim_model)
self.dropout = nn.Dropout(0.1)
self.attn = SelfAttention(dim=dim_model, heads=num_heads, causal=False, use_norm=use_norm, dropout=atten_dropout) if not conv_only else None
def forward(self, x, mask=None):
if self.attn is not None: x = x + (self.attn(self.norm(x), mask=mask))
return x + (self.conformer(x))