Robert Schulz commited on
Commit
b5a4cca
·
1 Parent(s): 2511aa0

commit files to HF hub

Browse files
Files changed (3) hide show
  1. model.py +0 -731
  2. tuc-ar.pth +2 -2
  3. ucf101.pth +0 -3
model.py DELETED
@@ -1,731 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.nn.functional as F
4
- from torchvision.models import resnet50
5
-
6
- class Conv2DBlock(nn.Module):
7
- def __init__(
8
- self,
9
- in_channels:int,
10
- out_channels:int,
11
- kernel_size_conv:tuple[int, int],
12
- kernel_size_pool:tuple[int, int],
13
- stride:tuple[int, int],
14
- padding_conv:int = 0,
15
- p_dropout:float = 0.5
16
- ):
17
- super(Conv2DBlock, self).__init__()
18
-
19
- self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size_conv, padding=padding_conv)
20
- self.pool = nn.MaxPool2d(kernel_size=kernel_size_pool, stride=stride)
21
- self.dropout = nn.Dropout2d(p_dropout)
22
- self.relu = nn.LeakyReLU()
23
-
24
- def forward(self, X):
25
- Y = self.conv(X)
26
- Y = self.pool(Y)
27
- Y = self.dropout(Y)
28
- Y = self.relu(Y)
29
-
30
- return Y
31
-
32
- class Conv3DBlock(nn.Module):
33
- def __init__(
34
- self,
35
- in_channels:int,
36
- out_channels:int,
37
- kernel_size_conv:tuple[int, int, int],
38
- kernel_size_pool:tuple[int, int, int],
39
- stride:tuple[int, int, int],
40
- padding_conv:int = 0,
41
- p_dropout:float = 0.5
42
- ):
43
- super(Conv3DBlock, self).__init__()
44
-
45
- self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=kernel_size_conv, padding=padding_conv)
46
- self.pool = nn.MaxPool3d(kernel_size=kernel_size_pool, stride=stride)
47
- self.dropout = nn.Dropout3d(p_dropout)
48
- self.batchnorm = nn.BatchNorm3d(out_channels)
49
- self.relu = nn.LeakyReLU()
50
-
51
- def forward(self, X):
52
- Y = self.conv(X)
53
- Y = self.pool(Y)
54
- Y = self.batchnorm(Y)
55
- Y = self.dropout(Y)
56
- Y = self.relu(Y)
57
-
58
- return Y
59
-
60
- class SelfAttention(nn.Module):
61
- def __init__(
62
- self,
63
- d_q:int = 2,
64
- d_k:int = 2,
65
- d_v:int = 4,
66
- embed_dim:int = 3
67
- ):
68
- super().__init__()
69
-
70
- self.d_q = d_q
71
- self.d_k = d_k
72
- self.d_v = d_v
73
-
74
- self.W_q = nn.Parameter(torch.rand(embed_dim, d_q))
75
- self.W_k = nn.Parameter(torch.rand(embed_dim, d_k))
76
- self.W_v = nn.Parameter(torch.rand(embed_dim, d_v))
77
- pass
78
-
79
- def forward(self, X):
80
- Z = []
81
- # iterate over batch_size
82
- for x in X:
83
- Q = x @ self.W_q # Queries
84
- K = x @ self.W_k # Keys
85
- V = x @ self.W_v # Values
86
-
87
- omega = Q @ K.T # omega ...unnormalized attantion weights
88
- alpha = F.softmax(omega / self.d_k**0.5, dim=0) # alpha ...normalized attention weights
89
- z = alpha @ V # z ...context vector -> attention-weighted version of original query input x_i
90
- Z.append(z)
91
-
92
- Z = torch.stack(Z)
93
- return Z
94
-
95
- class MultiHeadSelfAttention(nn.Module):
96
- def __init__(
97
- self,
98
- num_heads:int,
99
- d_q:int = 2,
100
- d_k:int = 2,
101
- d_v:int = 4,
102
- embed_dim:int = 3
103
- ):
104
- super().__init__()
105
-
106
- self.d_q = d_q
107
- self.d_k = d_k
108
- self.d_v = d_v
109
-
110
- self.heads = nn.ModuleList([SelfAttention(d_q, d_k, d_v, embed_dim) for _ in range(num_heads)])
111
-
112
- def forward(self, X):
113
- return torch.cat([head(X) for head in self.heads], dim=-1)
114
-
115
- class model001(nn.Module):
116
- def __init__(
117
- self,
118
- sequence_length = 30,
119
- num_actions:int = 10
120
- ):
121
- super(model001, self).__init__()
122
-
123
- self.conv1 = nn.Conv3d(sequence_length, 64, kernel_size=(2, 7, 7))
124
- self.maxPool1 = nn.MaxPool3d(kernel_size=(1, 7, 7), stride=(1, 5, 5))
125
- self.batchnorm1 = nn.BatchNorm3d(64)
126
-
127
- self.conv2 = nn.Conv3d(64, 96, kernel_size=(2, 5, 5))
128
- self.maxPool2 = nn.MaxPool3d(kernel_size=(1, 5, 5), stride=(1, 3, 3))
129
- self.batchnorm2 = nn.BatchNorm3d(96)
130
-
131
- self.conv3 = nn.Conv3d(96, 128, kernel_size=(2, 5, 5))
132
- self.maxPool3 = nn.MaxPool3d(kernel_size=(1, 5, 5), stride=(1, 3, 3))
133
- self.batchnorm3 = nn.BatchNorm3d(128)
134
-
135
- self.flatten = nn.Flatten()
136
- self.readout = nn.Linear(4608, num_actions)
137
-
138
- self.dropout1d = nn.Dropout1d(p = 0.2)
139
- self.dropout3d = nn.Dropout3d(p = 0.2)
140
-
141
- self.relu = nn.ReLU()
142
- self.softmax = nn.Softmax(dim = 1)
143
- self.sigmoid = nn.Sigmoid()
144
- self.num_actions = num_actions
145
-
146
- def forward(self, X):
147
- #X = X.permute(0, 2, 1, 3, 4)
148
- Y = X
149
-
150
- Y = self.conv1(Y)
151
- Y = self.maxPool1(Y)
152
- Y = self.batchnorm1(Y)
153
- Y = self.dropout3d(Y)
154
- Y = self.relu(Y)
155
-
156
- Y = self.conv2(Y)
157
- Y = self.maxPool2(Y)
158
- Y = self.batchnorm2(Y)
159
- Y = self.dropout3d(Y)
160
- Y = self.relu(Y)
161
-
162
- Y = self.conv3(Y)
163
- Y = self.maxPool3(Y)
164
- Y = self.batchnorm3(Y)
165
- Y = self.dropout3d(Y)
166
- Y = self.relu(Y)
167
-
168
- Y = self.flatten(Y)
169
-
170
- Y = self.readout(Y)
171
- Y = self.dropout1d(Y)
172
- Y = self.softmax(Y)
173
- #Y = self.sigmoid(Y)
174
-
175
- return Y
176
-
177
- class model002(nn.Module):
178
- def __init__(
179
- self,
180
- sequence_length = 30,
181
- num_actions:int = 10
182
- ):
183
- super(model002, self).__init__()
184
-
185
- self.sequence_length = sequence_length
186
- self.input_size = (400, 400)
187
-
188
- self.conv1 = Conv3DBlock(
189
- in_channels = sequence_length,
190
- out_channels = 64,
191
- kernel_size_conv = (2, 7, 7),
192
- kernel_size_pool = (1, 7, 7),
193
- stride = (1, 5, 5)
194
- )
195
- self.conv2 = Conv3DBlock(
196
- in_channels = 64,
197
- out_channels = 96,
198
- kernel_size_conv = (2, 5, 5),
199
- kernel_size_pool = (1, 5, 5),
200
- stride = (1, 3, 3)
201
- )
202
- self.conv3 = Conv3DBlock(
203
- in_channels = 96,
204
- out_channels = 128,
205
- kernel_size_conv = (2, 5, 5),
206
- kernel_size_pool = (1, 5, 5),
207
- stride = (1, 3, 3)
208
- )
209
- self.conv4 = Conv3DBlock(
210
- in_channels = 128,
211
- out_channels = 160,
212
- kernel_size_conv = (1, 3, 3),
213
- kernel_size_pool = (1, 3, 3),
214
- stride = (1, 2, 2)
215
- )
216
- self.flatten = nn.Flatten(start_dim=1)
217
- self.dropout = nn.Dropout()
218
- self.readout = nn.Linear(160, num_actions)
219
- self.softmax = nn.Softmax(dim=1)
220
- self.num_actions = num_actions
221
-
222
- def forward(self, X):
223
- assert X.shape[1] == self.sequence_length and X.shape[2] == 4 and X.shape[3] == self.input_size[0] and X.shape[4] == self.input_size[1],\
224
- f'Expected input shape (batch_size, sequence_length={self.sequence_length}, channels=4, width={self.input_size[0]}, height={self.input_size[1]}), but got ({X.shape})'
225
- Y = X
226
-
227
- Y = self.conv1(Y)
228
- #print(Y.shape)
229
- Y = self.conv2(Y)
230
- #print(Y.shape)
231
- Y = self.conv3(Y)
232
- #print(Y.shape)
233
- Y = self.conv4(Y)
234
- #print(Y.shape)
235
- Y = self.flatten(Y)
236
- Y = self.dropout(Y)
237
- #print(Y.shape)
238
- Y = self.readout(Y)
239
-
240
- Y = self.softmax(Y)
241
- return Y
242
-
243
- class model003(nn.Module):
244
- def __init__(
245
- self,
246
- sequence_length = 30,
247
- num_actions:int = 10
248
- ):
249
- super(model003, self).__init__()
250
-
251
- self.embed = resnet50(weights='DEFAULT')
252
-
253
- self.attention = MultiHeadSelfAttention(num_heads=16, embed_dim=1000)
254
- self.flatten = nn.Flatten(start_dim=1)
255
-
256
- readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
257
- self.readout = nn.Linear(readout_dim1, num_actions)
258
- self.softmax = nn.Softmax(dim=1)
259
- self.num_actions = num_actions
260
-
261
- def forward(self, X):
262
- embeddings = []
263
- for x in X:
264
- with torch.no_grad():
265
- embedded = self.embed(x)
266
- embeddings.append(embedded)
267
- embeddings = torch.stack(embeddings)
268
-
269
- Y = self.attention(embeddings)
270
- Y = self.flatten(Y)
271
- Y = self.readout(Y)
272
- Y = self.softmax(Y)
273
- return Y
274
-
275
- class model004(nn.Module):
276
- def __init__(
277
- self,
278
- sequence_length = 30,
279
- num_actions:int = 10
280
- ):
281
- super().__init__()
282
- self.sequence_length = sequence_length,
283
- self.num_actions = num_actions
284
-
285
- self.embed = nn.Embedding(sequence_length, 256)
286
-
287
- self.conv1 = Conv2DBlock(
288
- in_channels = 3,
289
- out_channels = 16,
290
- kernel_size_conv = (9, 9),
291
- kernel_size_pool = (7, 7),
292
- stride = (5, 5),
293
- padding_conv=1,
294
- p_dropout = 0
295
- )
296
- self.conv2 = Conv2DBlock(
297
- in_channels = 16,
298
- out_channels = 32,
299
- kernel_size_conv = (7, 7),
300
- kernel_size_pool = (5, 5),
301
- stride = (3, 3),
302
- p_dropout = 0
303
- )
304
- self.conv3 = Conv2DBlock(
305
- in_channels = 32,
306
- out_channels = 64,
307
- kernel_size_conv = (5, 5),
308
- kernel_size_pool = (3, 3),
309
- stride = (2, 2),
310
- p_dropout = 0
311
- )
312
- # self.conv4 = Conv2DBlock(
313
- # in_channels = 64,
314
- # out_channels = 128,
315
- # kernel_size_conv = (5, 5),
316
- # kernel_size_pool = (3, 3),
317
- # stride = (2, 2)
318
- # )
319
-
320
- self.attention = MultiHeadSelfAttention(num_heads=16, embed_dim=960)
321
- self.flatten = nn.Flatten(start_dim=1)
322
-
323
- readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
324
- self.readout = nn.Linear(readout_dim1, num_actions)
325
- self.softmax = nn.Softmax(dim=1)
326
-
327
- def forward(self, X:torch.Tensor):
328
- Y = X.reshape((X.shape[0] * X.shape[1], X.shape[2], X.shape[3], X.shape[4]))
329
- #print(Y.shape)
330
- Y = self.conv1(Y)
331
- #print(Y.shape)
332
- Y = self.conv2(Y)
333
- #print(Y.shape)
334
- Y = self.conv3(Y)
335
- #print(Y.shape)
336
- #Y = self.conv4(Y)
337
- #print(Y.shape)
338
- Y = Y.reshape((X.shape[0], X.shape[1], Y.shape[1] * Y.shape[2] * Y.shape[3]))
339
- #print(Y.shape)
340
- Y = self.attention(Y)
341
- #print(Y.shape)
342
- Y = self.flatten(Y)
343
- #print(Y.shape)
344
- Y = self.readout(Y)
345
- Y = self.softmax(Y)
346
- return Y
347
-
348
- class model005(nn.Module):
349
- def __init__(
350
- self,
351
- sequence_length = 30,
352
- num_actions:int = 10
353
- ):
354
- super().__init__()
355
- self.sequence_length = sequence_length
356
- self.num_actions = num_actions
357
- self.input_size = (300, 300)
358
-
359
- self.embed = nn.Embedding(sequence_length, 1000)
360
-
361
- self.conv1 = Conv2DBlock(
362
- in_channels = 3,
363
- out_channels = 16,
364
- kernel_size_conv = (7, 7),
365
- kernel_size_pool = (5, 5),
366
- stride = (4, 4),
367
- padding_conv=1,
368
- p_dropout = 0.2
369
- )
370
- self.conv2 = Conv2DBlock(
371
- in_channels = 16,
372
- out_channels = 32,
373
- kernel_size_conv = (7, 7),
374
- kernel_size_pool = (5, 5),
375
- stride = (3, 3),
376
- p_dropout = 0.2
377
- )
378
- self.conv3 = Conv2DBlock(
379
- in_channels = 32,
380
- out_channels = 64,
381
- kernel_size_conv = (5, 5),
382
- kernel_size_pool = (3, 3),
383
- stride = (2, 2),
384
- p_dropout = 0.2
385
- )
386
- self.conv4 = Conv2DBlock(
387
- in_channels = 64,
388
- out_channels = 128,
389
- kernel_size_conv = (5, 5),
390
- kernel_size_pool = (3, 3),
391
- stride = (2, 2),
392
- p_dropout = 0.2
393
- )
394
-
395
- self.attention = MultiHeadSelfAttention(num_heads=16, embed_dim=128)
396
- self.flatten = nn.Flatten(start_dim=1)
397
-
398
- readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
399
- self.readout = nn.Linear(readout_dim1, num_actions)
400
- self.softmax = nn.Softmax(dim=1)
401
-
402
- self.dropout = nn.Dropout(p = 0.2)
403
-
404
- def forward(self, X:torch.Tensor):
405
- assert X.shape[1] == self.sequence_length and X.shape[2] == 3 and X.shape[3] == self.input_size[0] and X.shape[4] == self.input_size[1],\
406
- f'Expected input shape (batch_size, sequence_length={self.sequence_length}, channels=3, width={self.input_size[0]}, height={self.input_size[1]}), but got ({X.shape})'
407
- Y = X.reshape((X.shape[0] * X.shape[1], X.shape[2], X.shape[3], X.shape[4]))
408
- #print(Y.shape)
409
- Y = self.conv1(Y)
410
- #print(Y.shape)
411
- Y = self.conv2(Y)
412
- #print(Y.shape)
413
- Y = self.conv3(Y)
414
- #print(Y.shape)
415
- Y = self.conv4(Y)
416
- #print(Y.shape)
417
- Y = Y.reshape((X.shape[0], X.shape[1], Y.shape[1] * Y.shape[2] * Y.shape[3]))
418
- #print(Y.shape)
419
- Y = self.attention(Y)
420
- #print(Y.shape)
421
- Y = self.flatten(Y)
422
- Y = self.dropout(Y)
423
- #print(Y.shape)
424
- Y = self.readout(Y)
425
- Y = self.dropout(Y)
426
- Y = self.softmax(Y)
427
- return Y
428
-
429
- class model006(nn.Module):
430
- def __init__(
431
- self,
432
- sequence_length = 30,
433
- num_actions:int = 10
434
- ):
435
- super().__init__()
436
- self.sequence_length = sequence_length
437
- self.num_actions = num_actions
438
- self.input_size = (300, 300)
439
-
440
- #self.embed = nn.Embedding(sequence_length, 1000)
441
-
442
- self.conv1 = Conv2DBlock(
443
- in_channels = 4,
444
- out_channels = 16,
445
- kernel_size_conv = (7, 7),
446
- kernel_size_pool = (5, 5),
447
- stride = (4, 4),
448
- padding_conv=1,
449
- p_dropout = 0.2
450
- )
451
- self.conv2 = Conv2DBlock(
452
- in_channels = 16,
453
- out_channels = 32,
454
- kernel_size_conv = (7, 7),
455
- kernel_size_pool = (5, 5),
456
- stride = (3, 3),
457
- p_dropout = 0.2
458
- )
459
- self.conv3 = Conv2DBlock(
460
- in_channels = 32,
461
- out_channels = 64,
462
- kernel_size_conv = (5, 5),
463
- kernel_size_pool = (3, 3),
464
- stride = (2, 2),
465
- p_dropout = 0.2
466
- )
467
- self.conv4 = Conv2DBlock(
468
- in_channels = 64,
469
- out_channels = 128,
470
- kernel_size_conv = (5, 5),
471
- kernel_size_pool = (3, 3),
472
- stride = (2, 2),
473
- p_dropout = 0.2
474
- )
475
-
476
- self.attention = MultiHeadSelfAttention(num_heads=32, embed_dim=128, d_q = 4, d_k = 4, d_v = 8)
477
- self.flatten = nn.Flatten(start_dim=1)
478
-
479
- readout_dim1 = sequence_length * len(self.attention.heads) * self.attention.d_v
480
- self.readout = nn.Linear(readout_dim1, num_actions)
481
- self.softmax = nn.Softmax(dim=1)
482
-
483
- self.dropout = nn.Dropout(p = 0.2)
484
-
485
- def forward(self, X:torch.Tensor):
486
- assert X.shape[1] == self.sequence_length and X.shape[2] == 4 and X.shape[3] == self.input_size[0] and X.shape[4] == self.input_size[1],\
487
- f'Expected input shape (batch_size, sequence_length={self.sequence_length}, channels=4, width={self.input_size[0]}, height={self.input_size[1]}), but got ({X.shape})'
488
- Y = X.reshape((X.shape[0] * X.shape[1], X.shape[2], X.shape[3], X.shape[4]))
489
- #print(Y.shape)
490
- Y = self.conv1(Y)
491
- #print(Y.shape)
492
- Y = self.conv2(Y)
493
- #print(Y.shape)
494
- Y = self.conv3(Y)
495
- #print(Y.shape)
496
- Y = self.conv4(Y)
497
- #print(Y.shape)
498
- Y = Y.reshape((X.shape[0], X.shape[1], Y.shape[1] * Y.shape[2] * Y.shape[3]))
499
- #print(Y.shape)
500
- Y = self.attention(Y)
501
- #print(Y.shape)
502
- Y = self.flatten(Y)
503
- Y = self.dropout(Y)
504
- #print(Y.shape)
505
- Y = self.readout(Y)
506
- Y = self.dropout(Y)
507
- Y = self.softmax(Y)
508
- return Y
509
-
510
- class model007(nn.Module):
511
- def __init__(
512
- self,
513
- sequence_length = 30,
514
- num_actions:int = 10
515
- ):
516
- super().__init__()
517
- self.sequence_length = sequence_length
518
- self.num_actions = num_actions
519
- self.input_size = (300, 300)
520
-
521
- self.conv1 = Conv3DBlock(
522
- in_channels = sequence_length,
523
- out_channels = 32,
524
- kernel_size_conv = (2, 7, 7),
525
- kernel_size_pool = (1, 7, 7),
526
- stride=(1, 5, 5),
527
- p_dropout = 0.2
528
- )
529
- self.conv2 = Conv3DBlock(
530
- in_channels = 32,
531
- out_channels = 64,
532
- kernel_size_conv = (2, 5, 5),
533
- kernel_size_pool = (1, 5, 5),
534
- stride=(1, 3, 3),
535
- p_dropout = 0.2
536
- )
537
- self.conv3 = Conv3DBlock(
538
- in_channels = 96,
539
- out_channels = 192,
540
- kernel_size_conv = (2, 5, 5),
541
- kernel_size_pool = (1, 3, 3),
542
- stride=(1, 2, 2),
543
- p_dropout = 0.2
544
- )
545
- self.conv4 = Conv3DBlock(
546
- in_channels = 288,
547
- out_channels = 675,
548
- kernel_size_conv = (1, 5, 5),
549
- kernel_size_pool = (1, 2, 2),
550
- stride=(1, 2, 2),
551
- p_dropout = 0.2
552
- )
553
-
554
- self.downsample13 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(1,3,3))
555
- self.downsample14 = nn.MaxPool3d(kernel_size=(2,9,9), stride=(2,8,8))
556
- self.downsample24 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(2,2,2))
557
-
558
- self.flatten = nn.Flatten(start_dim = 1)
559
-
560
- self.readout = nn.Linear(2700, num_actions)
561
-
562
- self.relu = nn.LeakyReLU()
563
- self.dropout = nn.Dropout(p = 0.5)
564
- self.softmax = nn.Softmax(dim = 1)
565
-
566
- def forward(self, X):
567
- Y = X
568
-
569
- Y1 = self.conv1(Y)
570
- Y2 = self.conv2(Y1)
571
- Y13 = self.downsample13(Y1)
572
- Y14 = self.downsample14(Y1)
573
- Y24 = self.downsample24(Y2)
574
- Y2_cat = torch.cat([Y2, Y13], dim=1)
575
- Y3 = self.conv3(Y2_cat)
576
- Y3_cat = torch.cat([Y3, Y14, Y24], dim=1)
577
-
578
- Y4 = self.conv4(Y3_cat)
579
-
580
-
581
- Y = self.flatten(Y4)
582
-
583
- # print('X', X.shape)
584
- # print('Y1', Y1.shape)
585
- # print('Y2', Y2.shape)
586
- # print('Y3', Y3.shape)
587
- # print('Y4', Y4.shape)
588
- # print('Y', Y.shape)
589
-
590
- # print('Y13', Y13.shape)
591
- # print('Y14', Y14.shape)
592
- # print('Y24', Y24.shape)
593
-
594
- # print('Y2_cat', Y2_cat.shape)
595
- # print('Y3_cat', Y3_cat.shape)
596
-
597
- Y = self.readout(Y)
598
- Y = self.softmax(Y)
599
-
600
- return Y
601
-
602
- class model008(nn.Module):
603
- def __init__(
604
- self,
605
- use_depth_channel:bool,
606
- sequence_length = 30,
607
- num_actions:int = 10,
608
- apply_softmax:bool = True
609
- ):
610
- super().__init__()
611
- self.sequence_length = sequence_length
612
- self.num_actions = num_actions
613
- self.use_depth_channel = use_depth_channel
614
-
615
- self.conv1 = Conv3DBlock(
616
- in_channels = sequence_length,
617
- out_channels = 64,
618
- kernel_size_conv = (2, 7, 7),
619
- kernel_size_pool = (1, 7, 7),
620
- stride=(1, 5, 5),
621
- p_dropout = 0.2
622
- )
623
- self.conv2 = Conv3DBlock(
624
- in_channels = 64,
625
- out_channels = 128,
626
- kernel_size_conv = (2, 5, 5),
627
- kernel_size_pool = (1, 5, 5),
628
- stride=(1, 3, 3),
629
- p_dropout = 0.2
630
- )
631
- self.conv3 = Conv3DBlock(
632
- in_channels = 192,
633
- out_channels = 384,
634
- kernel_size_conv = (2, 5, 5) if self.use_depth_channel else (1, 5, 5),
635
- kernel_size_pool = (1, 3, 3),
636
- stride=(1, 2, 2),
637
- p_dropout = 0.2
638
- )
639
- self.conv4 = Conv3DBlock(
640
- in_channels = 576,
641
- out_channels = 1152,
642
- kernel_size_conv = (1, 3, 3),
643
- kernel_size_pool = (1, 2, 2),
644
- stride=(1, 2, 2),
645
- p_dropout = 0.2
646
- )
647
-
648
- self.downsample13 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(1,3,3))
649
- self.downsample14 = nn.MaxPool3d(kernel_size=(2,9,9), stride=(2,8,8))
650
- if self.use_depth_channel:
651
- self.downsample24 = nn.MaxPool3d(kernel_size=(2,7,7), stride=(2,2,2))
652
- else:
653
- self.downsample24 = nn.MaxPool3d(kernel_size=(1,7,7), stride=(1,2,2))
654
-
655
- self.downsample1e = nn.MaxPool3d(kernel_size=(2,28,28), stride=(2,21,21))
656
- self.downsample2e = nn.MaxPool3d(kernel_size=(2,9,9) if self.use_depth_channel else (1,9,9), stride=(1,6,6))
657
- self.downsample3e = nn.MaxPool3d(kernel_size=(1,5,5), stride=(1,2,2))
658
-
659
- self.dropout3d = nn.Dropout3d(p=0.2)
660
-
661
- self.flatten = nn.Flatten(start_dim = 1)
662
-
663
- self.readout = nn.Linear(15552, num_actions)
664
-
665
- self.relu = nn.LeakyReLU()
666
- self.dropout = nn.Dropout(p = 0.2)
667
- self.softmax = nn.Softmax(dim = 1)
668
- self.sigmoid = nn.Sigmoid()
669
-
670
- self.apply_softmax = apply_softmax
671
-
672
- def forward(self, X):
673
- Y = X
674
-
675
- Y1 = self.conv1(Y)
676
- Y2 = self.conv2(Y1)
677
- Y13 = self.downsample13(Y1)
678
- Y14 = self.downsample14(Y1)
679
- Y24 = self.downsample24(Y2)
680
- Y2_cat = torch.cat([Y2, Y13], dim=1)
681
- Y3 = self.conv3(Y2_cat)
682
- Y3_cat = torch.cat([Y3, Y14, Y24], dim=1)
683
-
684
- Y4 = self.conv4(Y3_cat)
685
-
686
- Y1e = self.downsample1e(Y1)
687
- Y2e = self.downsample2e(Y2)
688
- Y3e = self.downsample3e(Y3)
689
-
690
- Y4_cat = torch.cat([Y4, Y1e, Y2e, Y3e], dim=1)
691
-
692
- Y = self.flatten(Y4_cat)
693
-
694
-
695
-
696
- # print('X', X.shape)
697
- # print('Y1', Y1.shape)
698
- # print('Y2', Y2.shape)
699
- # print('Y3', Y3.shape)
700
- # print('Y4', Y4.shape)
701
- # print('Y', Y.shape)
702
-
703
- # print('Y13', Y13.shape)
704
- # print('Y14', Y14.shape)
705
- # print('Y24', Y24.shape)
706
-
707
- # print('Y2_cat', Y2_cat.shape)
708
- # print('Y3_cat', Y3_cat.shape)
709
-
710
- Y = self.readout(Y)
711
-
712
- if self.apply_softmax:
713
- Y = self.softmax(Y)
714
- else:
715
- Y = self.sigmoid(Y)
716
-
717
- return Y
718
-
719
- if __name__ == '__main__':
720
- batch_size = 4
721
- seq_len = 30
722
- embed_dim = 3
723
- image_size = (400, 40)
724
-
725
- X = torch.rand((batch_size, seq_len, 3, image_size[0], image_size[1]))
726
-
727
- model3 = model003()
728
- model3.to('cpu')
729
- X = X.to('cpu')
730
- Y = model3(X)
731
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tuc-ar.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f928b8a21f5d7089395bb6f51e7556f7a0c0fa22951709016ff09bc9e1ac68d
3
- size 41698458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2f4e0ef9758d615a19ce51780930d93b79585aafd4124a9a44cfba690308681
3
+ size 41739558
ucf101.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d743f2b218846ef6ad770e3f4efcd95e2ba852e121cb67194381c311ece23405
3
- size 40739610