Spaces:

CS5647Team3
/

Mandarin_Tone_Evaluation

Running

Mandarin_Tone_Evaluation / model_cnn.py

Upload 4 files

2be48c4 about 1 year ago

1.82 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	import random
	import numpy as np
	from scipy.fftpack import fft
	import wave


	class Model(nn.Module):
	def __init__(self, input_dim=1, hidden_dim = 256, tone_class=5, syllable_class=1000):

	super().__init__()
	self.input_dim = input_dim
	self.tone_class = tone_class
	self.syllable_class = syllable_class
	# hidden_size = 128*hidden_dim//16
	conv_layers = []
	in_channels = input_dim # Input channels for the first layer
	channel_list = [16,16,'p2',32,32,'p2',64,64,'p1',64]
	# channel_list = [32,'p','p',128]
	# channel_list = [32,32,64,64,128]

	for out_channels in channel_list:
	if out_channels=='p2':
	conv_layers.append(nn.MaxPool2d(kernel_size=2))
	continue
	elif out_channels=='p1':
	conv_layers.append(nn.MaxPool2d(kernel_size=1))
	continue
	conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
	conv_layers.append(nn.BatchNorm2d(out_channels))
	conv_layers.append(nn.ReLU(inplace=True))
	conv_layers.append(nn.Dropout(0.1))
	in_channels = out_channels

	self.conv = nn.Sequential(*conv_layers)



	self.output = nn.Sequential(
	nn.Linear(4096, 128),
	nn.ReLU(),
	nn.Dropout(0.1),

	nn.Linear(128,syllable_class)
	)

	def forward(self, x):

	x = self.conv(x) #[batch_size,channel,length(input_length//4),hidden_dim]
	x = x.permute((0,2,1,3))#[batch_size,length,channel,hidden_dim]
	x = x.reshape(x.shape[0],x.shape[1],-1)

	return self.output(x)