SchulzR97
/

TUC-AR-C3D

Video Classification

English

robotics

Model card Files Files and versions Community

Robert Schulz commited on Jan 31

Commit

b5a4cca

1 Parent(s): 2511aa0

commit files to HF hub

Browse files

Files changed (3) hide show

model.py +0 -731
tuc-ar.pth +2 -2
ucf101.pth +0 -3

model.py DELETED Viewed

@@ -1,731 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torchvision.models import resnet50
-class Conv2DBlock(nn.Module):
-    def __init__(
-            self,
-            in_channels:int,
-            out_channels:int,
-            kernel_size_conv:tuple[int, int],
-            kernel_size_pool:tuple[int, int],
-            stride:tuple[int, int],
-            padding_conv:int = 0,
-            p_dropout:float = 0.5
-    ):
-        super(Conv2DBlock, self).__init__()
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size_conv, padding=padding_conv)
-        self.pool = nn.MaxPool2d(kernel_size=kernel_size_pool, stride=stride)
-        self.dropout = nn.Dropout2d(p_dropout)
-        self.relu = nn.LeakyReLU()
-    def forward(self, X):
-        Y = self.conv(X)
-        Y = self.pool(Y)
-        Y = self.dropout(Y)
-        Y = self.relu(Y)
-        return Y
-class Conv3DBlock(nn.Module):
-    def __init__(
-            self,
-            in_channels:int,
-            out_channels:int,
-            kernel_size_conv:tuple[int, int, int],
-            kernel_size_pool:tuple[int, int, int],
-            stride:tuple[int, int, int],
-            padding_conv:int = 0,
-            p_dropout:float = 0.5
-    ):
-        super(Conv3DBlock, self).__init__()
-        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=kernel_size_conv, padding=padding_conv)
-        self.pool = nn.MaxPool3d(kernel_size=kernel_size_pool, stride=stride)
-        self.dropout = nn.Dropout3d(p_dropout)
-        self.batchnorm = nn.BatchNorm3d(out_channels)
-        self.relu = nn.LeakyReLU()
-    def forward(self, X):
-        Y = self.conv(X)
-        Y = self.pool(Y)
-        Y = self.batchnorm(Y)
-        Y = self.dropout(Y)
-        Y = self.relu(Y)
-        return Y
-class SelfAttention(nn.Module):
-    def __init__(
-            self,
-            d_q:int = 2,
-            d_k:int = 2,
-            d_v:int = 4,
-            embed_dim:int = 3
-        ):
-        super().__init__()
-        self.d_q = d_q
-        self.d_k = d_k
-        self.d_v = d_v
-        self.W_q = nn.Parameter(torch.rand(embed_dim, d_q))
-        self.W_k = nn.Parameter(torch.rand(embed_dim, d_k))
-        self.W_v = nn.Parameter(torch.rand(embed_dim, d_v))
-        pass
-    def forward(self, X):
-        Z = []
-        # iterate over batch_size
-        for x in X:
-            Q = x @ self.W_q    # Queries
-            K = x @ self.W_k    # Keys
-            V = x @ self.W_v    # Values
-            omega = Q @ K.T                                     # omega ...unnormalized attantion weights
-            alpha = F.softmax(omega / self.d_k**0.5, dim=0)     # alpha ...normalized attention weights
-            z = alpha @ V                                       # z     ...context vector -> attention-weighted version of original query input x_i
-            Z.append(z)
-        Z = torch.stack(Z)
-        return Z
-class MultiHeadSelfAttention(nn.Module):
-    def __init__(
-            self,
-            num_heads:int,
-            d_q:int = 2,
-            d_k:int = 2,
-            d_v:int = 4,
-            embed_dim:int = 3
-        ):
-        super().__init__()
-        self.d_q = d_q
-        self.d_k = d_k
-        self.d_v = d_v
-        self.heads = nn.ModuleList([SelfAttention(d_q, d_k, d_v, embed_dim) for _ in range(num_heads)])
-    def forward(self, X):
-        return torch.cat([head(X) for head in self.heads], dim=-1)
-class model001(nn.Module):
-    def __init__(
-            self,
-            sequence_length = 30,
-            num_actions:int = 10
-        ):
-        super(model001, self).__init__()
-        self.conv1 = nn.Conv3d(sequence_length, 64, kernel_size=(2, 7, 7))
-        self.maxPool1 = nn.MaxPool3d(kernel_size=(1, 7, 7), stride=(1, 5, 5))
-        self.batchnorm1 = nn.BatchNorm3d(64)
-        self.conv2 = nn.Conv3d(64, 96, kernel_size=(2, 5, 5))
-        self.maxPool2 = nn.MaxPool3d(kernel_size=(1, 5, 5), stride=(1, 3, 3))
-        self.batchnorm2 = nn.BatchNorm3d(96)
-        self.conv3 = nn.Conv3d(96, 128, kernel_size=(2, 5, 5))
-        self.maxPool3 = nn.MaxPool3d(kernel_size=(1, 5, 5), stride=(1, 3, 3))
-        self.batchnorm3 = nn.BatchNorm3d(128)
-        self.flatten = nn.Flatten()
-        self.readout = nn.Linear(4608, num_actions)
-        self.dropout1d = nn.Dropout1d(p = 0.2)
-        self.dropout3d = nn.Dropout3d(p = 0.2)
-        self.relu = nn.ReLU()
-        self.softmax = nn.Softmax(dim = 1)
-        self.sigmoid = nn.Sigmoid()
-        self.num_actions = num_actions
-    def forward(self, X):
-        #X = X.permute(0, 2, 1, 3, 4)
-        Y = X
-        Y = self.conv1(Y)
-        Y = self.maxPool1(Y)
-        Y = self.batchnorm1(Y)
-        Y = self.dropout3d(Y)
-        Y = self.relu(Y)
-        Y = self.conv2(Y)
-        Y = self.maxPool2(Y)
-        Y = self.batchnorm2(Y)
-        Y = self.dropout3d(Y)
-        Y = self.relu(Y)
-        Y = self.conv3(Y)
-        Y = self.maxPool3(Y)
-        Y = self.batchnorm3(Y)
-        Y = self.dropout3d(Y)
-        Y = self.relu(Y)
-        Y = self.flatten(Y)
-        Y = self.readout(Y)
-        Y = self.dropout1d(Y)
-        Y = self.softmax(Y)
-        #Y = self.sigmoid(Y)
-        return Y
-class model002(nn.Module):
-    def __init__(
-            self,
-            sequence_length = 30,
-            num_actions:int = 10
-        ):
-        super(model002, self).__init__()
-        self.sequence_length = sequence_length
-        self.input_size = (400, 400)
-        self.conv1 = Conv3DBlock(
-            in_channels = sequence_length,
-            out_channels = 64,
-            kernel_size_conv = (2, 7, 7),
-            kernel_size_pool = (1, 7, 7),
-            stride = (1, 5, 5)
-        )
-        self.conv2 = Conv3DBlock(
-            in_channels = 64,
-            out_channels = 96,
-            kernel_size_conv = (2, 5, 5),
-            kernel_size_pool = (1, 5, 5),
-            stride = (1, 3, 3)
-        )
-        self.conv3 = Conv3DBlock(
-            in_channels = 96,
-            out_channels = 128,
-            kernel_size_conv = (2, 5, 5),
-            kernel_size_pool = (1, 5, 5),
-            stride = (1, 3, 3)
-        )
-        self.conv4 = Conv3DBlock(
-            in_channels = 128,
-            out_channels = 160,
-            kernel_size_conv = (1, 3, 3),
-            kernel_size_pool = (1, 3, 3),
-            stride = (1, 2, 2)
-        )
-        self.flatten = nn.Flatten(start_dim=1)
-        self.dropout = nn.Dropout()
-        self.readout = nn.Linear(160, num_actions)
-        self.softmax = nn.Softmax(dim=1)
-        self.num_actions = num_actions
-    def forward(self, X):
-        assert X.shape[1] == self.sequence_length and X.shape[2] == 4 and X.shape[3] == self.input_size[0] and X.shape[4] == self.input_size[1],\
-            f'Expected input shape (batch_size, sequence_length={self.sequence_length}, channels=4, width={self.input_size[0]}, height={self.input_size[1]}), but got ({X.shape})'
-        Y = X
-        Y = self.conv1(Y)
-        #print(Y.shape)
-        Y = self.conv2(Y)
-        #print(Y.shape)
-        Y = self.conv3(Y)
-        #print(Y.shape)
-        Y = self.conv4(Y)
-        #print(Y.shape)
-        Y = self.flatten(Y)
-        Y = self.dropout(Y)
-        #print(Y.shape)
-        Y = self.readout(Y)
-        Y = self.softmax(Y)
-        return Y
-class model003(nn.Module):
-    def __init__(
-            self,
-            sequence_length = 30,
-            num_actions:int = 10
-        ):
-        super(model003, self).__init__()
-        self.embed = resnet50(weights='DEFAULT')
-        self.attention = MultiHeadSelfAttention(num_heads=16, embed_dim=1000)
-        self.flatten = nn.Flatten(start_dim=1)
-        readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
-        self.readout = nn.Linear(readout_dim1, num_actions)
-        self.softmax = nn.Softmax(dim=1)
-        self.num_actions = num_actions
-    def forward(self, X):
-        embeddings = []
-        for x in X:
-            with torch.no_grad():
-                embedded = self.embed(x)
-            embeddings.append(embedded)
-        embeddings = torch.stack(embeddings)
-        Y = self.attention(embeddings)
-        Y = self.flatten(Y)
-        Y = self.readout(Y)
-        Y = self.softmax(Y)
-        return Y
-class model004(nn.Module):
-    def __init__(
-            self,
-            sequence_length = 30,
-            num_actions:int = 10
-        ):
-        super().__init__()
-        self.sequence_length = sequence_length,
-        self.num_actions = num_actions
-        self.embed = nn.Embedding(sequence_length, 256)
-        self.conv1 = Conv2DBlock(
-            in_channels = 3,
-            out_channels = 16,
-            kernel_size_conv = (9, 9),
-            kernel_size_pool = (7, 7),
-            stride = (5, 5),
-            padding_conv=1,
-            p_dropout = 0
-        )
-        self.conv2 = Conv2DBlock(
-            in_channels = 16,
-            out_channels = 32,
-            kernel_size_conv = (7, 7),
-            kernel_size_pool = (5, 5),
-            stride = (3, 3),
-            p_dropout = 0
-        )
-        self.conv3 = Conv2DBlock(
-            in_channels = 32,
-            out_channels = 64,
-            kernel_size_conv = (5, 5),
-            kernel_size_pool = (3, 3),
-            stride = (2, 2),
-            p_dropout = 0
-        )
-        # self.conv4 = Conv2DBlock(
-        #     in_channels = 64,
-        #     out_channels = 128,
-        #     kernel_size_conv = (5, 5),
-        #     kernel_size_pool = (3, 3),
-        #     stride = (2, 2)
-        # )
-        self.attention = MultiHeadSelfAttention(num_heads=16, embed_dim=960)
-        self.flatten = nn.Flatten(start_dim=1)
-        readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
-        self.readout = nn.Linear(readout_dim1, num_actions)
-        self.softmax = nn.Softmax(dim=1)
-    def forward(self, X:torch.Tensor):
-        Y = X.reshape((X.shape[0] * X.shape[1], X.shape[2], X.shape[3], X.shape[4]))
-        #print(Y.shape)
-        Y = self.conv1(Y)
-        #print(Y.shape)
-        Y = self.conv2(Y)
-        #print(Y.shape)
-        Y = self.conv3(Y)
-        #print(Y.shape)
-        #Y = self.conv4(Y)
-        #print(Y.shape)
-        Y = Y.reshape((X.shape[0], X.shape[1], Y.shape[1] * Y.shape[2] * Y.shape[3]))
-        #print(Y.shape)
-        Y = self.attention(Y)
-        #print(Y.shape)
-        Y = self.flatten(Y)
-        #print(Y.shape)
-        Y = self.readout(Y)
-        Y = self.softmax(Y)
-        return Y
-class model005(nn.Module):
-    def __init__(
-            self,
-            sequence_length = 30,
-            num_actions:int = 10
-        ):
-        super().__init__()
-        self.sequence_length = sequence_length
-        self.num_actions = num_actions
-        self.input_size = (300, 300)
-        self.embed = nn.Embedding(sequence_length, 1000)
-        self.conv1 = Conv2DBlock(
-            in_channels = 3,
-            out_channels = 16,
-            kernel_size_conv = (7, 7),
-            kernel_size_pool = (5, 5),
-            stride = (4, 4),
-            padding_conv=1,
-            p_dropout = 0.2
-        )
-        self.conv2 = Conv2DBlock(
-            in_channels = 16,
-            out_channels = 32,
-            kernel_size_conv = (7, 7),
-            kernel_size_pool = (5, 5),
-            stride = (3, 3),
-            p_dropout = 0.2
-        )
-        self.conv3 = Conv2DBlock(
-            in_channels = 32,
-            out_channels = 64,
-            kernel_size_conv = (5, 5),
-            kernel_size_pool = (3, 3),
-            stride = (2, 2),
-            p_dropout = 0.2
-        )
-        self.conv4 = Conv2DBlock(
-            in_channels = 64,
-            out_channels = 128,
-            kernel_size_conv = (5, 5),
-            kernel_size_pool = (3, 3),
-            stride = (2, 2),
-            p_dropout = 0.2
-        )
-        self.attention = MultiHeadSelfAttention(num_heads=16, embed_dim=128)
-        self.flatten = nn.Flatten(start_dim=1)
-        readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
-        self.readout = nn.Linear(readout_dim1, num_actions)
-        self.softmax = nn.Softmax(dim=1)
-        self.dropout = nn.Dropout(p = 0.2)
-    def forward(self, X:torch.Tensor):
-        assert X.shape[1] == self.sequence_length and X.shape[2] == 3 and X.shape[3] == self.input_size[0] and X.shape[4] == self.input_size[1],\
-            f'Expected input shape (batch_size, sequence_length={self.sequence_length}, channels=3, width={self.input_size[0]}, height={self.input_size[1]}), but got ({X.shape})'
-        Y = X.reshape((X.shape[0] * X.shape[1], X.shape[2], X.shape[3], X.shape[4]))
-        #print(Y.shape)
-        Y = self.conv1(Y)
-        #print(Y.shape)
-        Y = self.conv2(Y)
-        #print(Y.shape)
-        Y = self.conv3(Y)
-        #print(Y.shape)
-        Y = self.conv4(Y)
-        #print(Y.shape)
-        Y = Y.reshape((X.shape[0], X.shape[1], Y.shape[1] * Y.shape[2] * Y.shape[3]))
-        #print(Y.shape)
-        Y = self.attention(Y)
-        #print(Y.shape)
-        Y = self.flatten(Y)
-        Y = self.dropout(Y)
-        #print(Y.shape)
-        Y = self.readout(Y)
-        Y = self.dropout(Y)
-        Y = self.softmax(Y)
-        return Y
-class model006(nn.Module):
-    def __init__(
-            self,
-            sequence_length = 30,
-            num_actions:int = 10
-        ):
-        super().__init__()
-        self.sequence_length = sequence_length
-        self.num_actions = num_actions
-        self.input_size = (300, 300)
-        #self.embed = nn.Embedding(sequence_length, 1000)
-        self.conv1 = Conv2DBlock(
-            in_channels = 4,
-            out_channels = 16,
-            kernel_size_conv = (7, 7),
-            kernel_size_pool = (5, 5),
-            stride = (4, 4),
-            padding_conv=1,
-            p_dropout = 0.2
-        )
-        self.conv2 = Conv2DBlock(
-            in_channels = 16,
-            out_channels = 32,
-            kernel_size_conv = (7, 7),
-            kernel_size_pool = (5, 5),
-            stride = (3, 3),
-            p_dropout = 0.2
-        )
-        self.conv3 = Conv2DBlock(
-            in_channels = 32,
-            out_channels = 64,
-            kernel_size_conv = (5, 5),
-            kernel_size_pool = (3, 3),
-            stride = (2, 2),
-            p_dropout = 0.2
-        )
-        self.conv4 = Conv2DBlock(
-            in_channels = 64,
-            out_channels = 128,
-            kernel_size_conv = (5, 5),
-            kernel_size_pool = (3, 3),
-            stride = (2, 2),
-            p_dropout = 0.2
-        )
-        self.attention = MultiHeadSelfAttention(num_heads=32, embed_dim=128, d_q = 4, d_k = 4, d_v = 8)
-        self.flatten = nn.Flatten(start_dim=1)
-        readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
-        self.readout = nn.Linear(readout_dim1, num_actions)
-        self.softmax = nn.Softmax(dim=1)
-        self.dropout = nn.Dropout(p = 0.2)
-    def forward(self, X:torch.Tensor):
-        assert X.shape[1] == self.sequence_length and X.shape[2] == 4 and X.shape[3] == self.input_size[0] and X.shape[4] == self.input_size[1],\
-            f'Expected input shape (batch_size, sequence_length={self.sequence_length}, channels=4, width={self.input_size[0]}, height={self.input_size[1]}), but got ({X.shape})'
-        Y = X.reshape((X.shape[0] * X.shape[1], X.shape[2], X.shape[3], X.shape[4]))
-        #print(Y.shape)
-        Y = self.conv1(Y)
-        #print(Y.shape)
-        Y = self.conv2(Y)
-        #print(Y.shape)
-        Y = self.conv3(Y)
-        #print(Y.shape)
-        Y = self.conv4(Y)
-        #print(Y.shape)
-        Y = Y.reshape((X.shape[0], X.shape[1], Y.shape[1] * Y.shape[2] * Y.shape[3]))
-        #print(Y.shape)
-        Y = self.attention(Y)
-        #print(Y.shape)
-        Y = self.flatten(Y)
-        Y = self.dropout(Y)
-        #print(Y.shape)
-        Y = self.readout(Y)
-        Y = self.dropout(Y)
-        Y = self.softmax(Y)
-        return Y
-class model007(nn.Module):
-    def __init__(
-            self,
-            sequence_length = 30,
-            num_actions:int = 10
-        ):
-        super().__init__()
-        self.sequence_length = sequence_length
-        self.num_actions = num_actions
-        self.input_size = (300, 300)
-        self.conv1 = Conv3DBlock(
-            in_channels = sequence_length,
-            out_channels = 32,
-            kernel_size_conv = (2, 7, 7),
-            kernel_size_pool = (1, 7, 7),
-            stride=(1, 5, 5),
-            p_dropout = 0.2
-        )
-        self.conv2 = Conv3DBlock(
-            in_channels = 32,
-            out_channels = 64,
-            kernel_size_conv = (2, 5, 5),
-            kernel_size_pool = (1, 5, 5),
-            stride=(1, 3, 3),
-            p_dropout = 0.2
-        )
-        self.conv3 = Conv3DBlock(
-            in_channels = 96,
-            out_channels = 192,
-            kernel_size_conv = (2, 5, 5),
-            kernel_size_pool = (1, 3, 3),
-            stride=(1, 2, 2),
-            p_dropout = 0.2
-        )
-        self.conv4 = Conv3DBlock(
-            in_channels = 288,
-            out_channels = 675,
-            kernel_size_conv = (1, 5, 5),
-            kernel_size_pool = (1, 2, 2),
-            stride=(1, 2, 2),
-            p_dropout = 0.2
-        )
-        self.downsample13 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(1,3,3))
-        self.downsample14 = nn.MaxPool3d(kernel_size=(2,9,9), stride=(2,8,8))
-        self.downsample24 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(2,2,2))
-        self.flatten = nn.Flatten(start_dim = 1)
-        self.readout = nn.Linear(2700, num_actions)
-        self.relu = nn.LeakyReLU()
-        self.dropout = nn.Dropout(p = 0.5)
-        self.softmax = nn.Softmax(dim = 1)
-    def forward(self, X):
-        Y = X
-        Y1 = self.conv1(Y)
-        Y2 = self.conv2(Y1)
-        Y13 = self.downsample13(Y1)
-        Y14 = self.downsample14(Y1)
-        Y24 = self.downsample24(Y2)
-        Y2_cat = torch.cat([Y2, Y13], dim=1)
-        Y3 = self.conv3(Y2_cat)
-        Y3_cat = torch.cat([Y3, Y14, Y24], dim=1)
-        Y4 = self.conv4(Y3_cat)
-        Y = self.flatten(Y4)
-        # print('X', X.shape)
-        # print('Y1', Y1.shape)
-        # print('Y2', Y2.shape)
-        # print('Y3', Y3.shape)
-        # print('Y4', Y4.shape)
-        # print('Y', Y.shape)
-        # print('Y13', Y13.shape)
-        # print('Y14', Y14.shape)
-        # print('Y24', Y24.shape)
-        # print('Y2_cat', Y2_cat.shape)
-        # print('Y3_cat', Y3_cat.shape)
-        Y = self.readout(Y)
-        Y = self.softmax(Y)
-        return Y
-class model008(nn.Module):
-    def __init__(
-            self,
-            use_depth_channel:bool,
-            sequence_length = 30,
-            num_actions:int = 10,
-            apply_softmax:bool = True
-        ):
-        super().__init__()
-        self.sequence_length = sequence_length
-        self.num_actions = num_actions
-        self.use_depth_channel = use_depth_channel
-        self.conv1 = Conv3DBlock(
-            in_channels = sequence_length,
-            out_channels = 64,
-            kernel_size_conv = (2, 7, 7),
-            kernel_size_pool = (1, 7, 7),
-            stride=(1, 5, 5),
-            p_dropout = 0.2
-        )
-        self.conv2 = Conv3DBlock(
-            in_channels = 64,
-            out_channels = 128,
-            kernel_size_conv = (2, 5, 5),
-            kernel_size_pool = (1, 5, 5),
-            stride=(1, 3, 3),
-            p_dropout = 0.2
-        )
-        self.conv3 = Conv3DBlock(
-            in_channels = 192,
-            out_channels = 384,
-            kernel_size_conv = (2, 5, 5) if self.use_depth_channel else (1, 5, 5),
-            kernel_size_pool = (1, 3, 3),
-            stride=(1, 2, 2),
-            p_dropout = 0.2
-        )
-        self.conv4 = Conv3DBlock(
-            in_channels = 576,
-            out_channels = 1152,
-            kernel_size_conv = (1, 3, 3),
-            kernel_size_pool = (1, 2, 2),
-            stride=(1, 2, 2),
-            p_dropout = 0.2
-        )
-        self.downsample13 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(1,3,3))
-        self.downsample14 = nn.MaxPool3d(kernel_size=(2,9,9), stride=(2,8,8))
-        if self.use_depth_channel:
-            self.downsample24 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(2,2,2))
-        else:
-            self.downsample24 = nn.MaxPool3d(kernel_size=(1,7,7), stride=(1,2,2))
-        self.downsample1e = nn.MaxPool3d(kernel_size=(2,28,28), stride=(2,21,21))
-        self.downsample2e = nn.MaxPool3d(kernel_size=(2,9,9) if self.use_depth_channel else (1,9,9), stride=(1,6,6))
-        self.downsample3e = nn.MaxPool3d(kernel_size=(1,5,5), stride=(1,2,2))
-        self.dropout3d = nn.Dropout3d(p=0.2)
-        self.flatten = nn.Flatten(start_dim = 1)
-        self.readout = nn.Linear(15552, num_actions)
-        self.relu = nn.LeakyReLU()
-        self.dropout = nn.Dropout(p = 0.2)
-        self.softmax = nn.Softmax(dim = 1)
-        self.sigmoid = nn.Sigmoid()
-        self.apply_softmax = apply_softmax
-    def forward(self, X):
-        Y = X
-        Y1 = self.conv1(Y)
-        Y2 = self.conv2(Y1)
-        Y13 = self.downsample13(Y1)
-        Y14 = self.downsample14(Y1)
-        Y24 = self.downsample24(Y2)
-        Y2_cat = torch.cat([Y2, Y13], dim=1)
-        Y3 = self.conv3(Y2_cat)
-        Y3_cat = torch.cat([Y3, Y14, Y24], dim=1)
-        Y4 = self.conv4(Y3_cat)
-        Y1e = self.downsample1e(Y1)
-        Y2e = self.downsample2e(Y2)
-        Y3e = self.downsample3e(Y3)
-        Y4_cat = torch.cat([Y4, Y1e, Y2e, Y3e], dim=1)
-        Y = self.flatten(Y4_cat)
-        # print('X', X.shape)
-        # print('Y1', Y1.shape)
-        # print('Y2', Y2.shape)
-        # print('Y3', Y3.shape)
-        # print('Y4', Y4.shape)
-        # print('Y', Y.shape)
-        # print('Y13', Y13.shape)
-        # print('Y14', Y14.shape)
-        # print('Y24', Y24.shape)
-        # print('Y2_cat', Y2_cat.shape)
-        # print('Y3_cat', Y3_cat.shape)
-        Y = self.readout(Y)
-        if self.apply_softmax:
-            Y = self.softmax(Y)
-        else:
-            Y = self.sigmoid(Y)
-        return Y
-if __name__ == '__main__':
-    batch_size = 4
-    seq_len = 30
-    embed_dim = 3
-    image_size = (400, 40)
-    X = torch.rand((batch_size, seq_len, 3, image_size[0], image_size[1]))
-    model3 = model003()
-    model3.to('cpu')
-    X = X.to('cpu')
-    Y = model3(X)
-    pass

tuc-ar.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f928b8a21f5d7089395bb6f51e7556f7a0c0fa22951709016ff09bc9e1ac68d
-size 41698458

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2f4e0ef9758d615a19ce51780930d93b79585aafd4124a9a44cfba690308681
+size 41739558

ucf101.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d743f2b218846ef6ad770e3f4efcd95e2ba852e121cb67194381c311ece23405
-size 40739610