atiwari751 commited on
Commit
15b2f03
·
unverified ·
2 Parent(s): d79160e 6e2d47c

Merge pull request #2 from shrits-ai/main

Browse files
.gitignore CHANGED
@@ -3,4 +3,6 @@ data/
3
  __pycache__
4
  ResNet 50_Model.xlsx
5
  ~$ResNet 50_Model.xlsx
 
 
6
 
 
3
  __pycache__
4
  ResNet 50_Model.xlsx
5
  ~$ResNet 50_Model.xlsx
6
+ checkpoint.pth
7
+
8
 
README.md CHANGED
@@ -6,6 +6,32 @@
6
 
7
  ## Data Augmentations
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  ## Model Results
 
6
 
7
  ## Data Augmentations
8
 
9
+ To enhance the model's robustness and generalization capabilities, we apply a series of data augmentations to the training dataset. These augmentations are inspired by the original ResNet paper and implemented using the albumentations library. The augmentations include random resized cropping, horizontal flipping, and color jittering, followed by normalization. These transformations help the model learn invariant features and improve performance on unseen data.
10
+
11
+ ### Augmentations and Hyperparameters
12
+
13
+ 1. **Random Resized Crop:**
14
+ - Height: 224
15
+ - Width: 224
16
+ - Scale: (0.08, 1.0)
17
+ - Aspect Ratio: (3/4, 4/3)
18
+ - Probability: 1.0
19
+
20
+ 2. **Horizontal Flip:**
21
+ - Probability: 0.5
22
+
23
+ 3. **Color Jitter:**
24
+ - Brightness: 0.4
25
+ - Contrast: 0.4
26
+ - Saturation: 0.4
27
+ - Hue: 0.1
28
+ - Probability: 0.8
29
+
30
+ 4. **Normalization:**
31
+ - Mean: (0.485, 0.456, 0.406)
32
+ - Standard Deviation: (0.229, 0.224, 0.225)
33
+
34
+ These augmentations are applied only to the training dataset, while the test dataset undergoes resizing and normalization to ensure consistent evaluation metrics.
35
 
36
 
37
  ## Model Results
resnet_execute.py CHANGED
@@ -8,39 +8,55 @@ from resnet_model import ResNet50
8
  from tqdm import tqdm
9
  from torchvision import datasets
10
  from checkpoint import save_checkpoint, load_checkpoint
 
 
 
 
 
 
11
 
12
  # Define transformations
13
- transform = transforms.Compose([
14
- transforms.Resize(256), # Resize the smaller side to 256 pixels while keeping aspect ratio
15
- transforms.CenterCrop(224), # Then crop to 224x224 pixels from the center
16
- transforms.ToTensor(),
17
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # ImageNet normalization
 
 
 
 
 
 
 
 
18
  ])
19
 
20
  # Train dataset and loader
21
- trainset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/train', transform=transform)
22
- trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=16, pin_memory=True)
23
 
24
- testset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/val', transform=transform )
25
- testloader = DataLoader(testset, batch_size=1000, shuffle=False, num_workers=16, pin_memory=True)
26
 
27
  # Initialize model, loss function, and optimizer
28
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
29
  model = ResNet50()
30
  model = torch.nn.DataParallel(model)
31
  model = model.to(device)
 
32
 
33
  criterion = nn.CrossEntropyLoss()
34
  optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
35
 
36
  # Training function
37
  from torch.amp import autocast
38
- from tqdm import tqdm
39
 
40
  def train(model, device, train_loader, optimizer, criterion, epoch, accumulation_steps=4):
41
  model.train()
42
  running_loss = 0.0
43
- correct = 0
 
44
  total = 0
45
  pbar = tqdm(train_loader)
46
 
@@ -58,24 +74,28 @@ def train(model, device, train_loader, optimizer, criterion, epoch, accumulation
58
  optimizer.zero_grad()
59
 
60
  running_loss += loss.item() * accumulation_steps
61
- _, predicted = outputs.max(1)
62
  total += targets.size(0)
63
- correct += predicted.eq(targets).sum().item()
 
64
 
65
- pbar.set_description(desc=f'Epoch {epoch} | Loss: {running_loss / (batch_idx + 1):.4f} | Accuracy: {100. * correct / total:.2f}%')
66
 
67
  if (batch_idx + 1) % 50 == 0:
68
  torch.cuda.empty_cache()
69
 
70
- return 100. * correct / total
71
-
72
 
73
  # Testing function
74
  def test(model, device, test_loader, criterion):
75
  model.eval()
76
  test_loss = 0
77
- correct = 0
 
78
  total = 0
 
 
 
79
 
80
  with torch.no_grad():
81
  for inputs, targets in test_loader:
@@ -84,13 +104,22 @@ def test(model, device, test_loader, criterion):
84
  loss = criterion(outputs, targets)
85
 
86
  test_loss += loss.item()
87
- _, predicted = outputs.max(1)
88
  total += targets.size(0)
89
- correct += predicted.eq(targets).sum().item()
 
 
 
 
 
 
 
 
90
 
91
- test_accuracy = 100.*correct/total
92
- print(f'Test Loss: {test_loss/len(test_loader):.4f}, Accuracy: {test_accuracy:.2f}%')
93
- return test_accuracy, test_loss/len(test_loader)
 
94
 
95
  # Main execution
96
  if __name__ == '__main__':
@@ -105,10 +134,19 @@ if __name__ == '__main__':
105
  except FileNotFoundError:
106
  print("No checkpoint found, starting from scratch.")
107
 
108
- for epoch in range(1, 6): # 20 epochs
109
- train_accuracy = train(model, device, trainloader, optimizer, criterion, epoch)
110
- test_accuracy, test_loss = test(model, device, testloader, criterion)
111
- print(f'Epoch {epoch} | Train Accuracy: {train_accuracy:.2f}% | Test Accuracy: {test_accuracy:.2f}%')
 
 
 
 
 
 
 
 
 
112
  if test_loss < best_loss:
113
  best_loss = test_loss
114
  patience_counter = 0
@@ -119,3 +157,64 @@ if __name__ == '__main__':
119
  if patience_counter >= patience:
120
  print("Early stopping triggered. Training terminated.")
121
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from tqdm import tqdm
9
  from torchvision import datasets
10
  from checkpoint import save_checkpoint, load_checkpoint
11
+ import matplotlib.pyplot as plt
12
+ from torchvision.utils import make_grid
13
+ import albumentations as A
14
+ from albumentations.pytorch import ToTensorV2
15
+ import numpy as np
16
+ from torchsummary import summary
17
 
18
  # Define transformations
19
+ train_transform = A.Compose([
20
+ A.RandomResizedCrop(height=224, width=224, scale=(0.08, 1.0), ratio=(3/4, 4/3), p=1.0),
21
+ A.HorizontalFlip(p=0.5),
22
+ A.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1, p=0.8),
23
+ A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
24
+ ToTensorV2()
25
+ ])
26
+
27
+ test_transform = A.Compose([
28
+ A.Resize(height=256, width=256),
29
+ A.CenterCrop(height=224, width=224),
30
+ A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
31
+ ToTensorV2()
32
  ])
33
 
34
  # Train dataset and loader
35
+ trainset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/train', transform=lambda img: train_transform(image=np.array(img))['image'])
36
+ trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True)
37
 
38
+ testset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/val', transform=lambda img: test_transform(image=np.array(img))['image'])
39
+ testloader = DataLoader(testset, batch_size=500, shuffle=False, num_workers=8, pin_memory=True)
40
 
41
  # Initialize model, loss function, and optimizer
42
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43
+ print( device )
44
  model = ResNet50()
45
  model = torch.nn.DataParallel(model)
46
  model = model.to(device)
47
+ summary(model, input_size=(3, 224, 224))
48
 
49
  criterion = nn.CrossEntropyLoss()
50
  optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
51
 
52
  # Training function
53
  from torch.amp import autocast
 
54
 
55
  def train(model, device, train_loader, optimizer, criterion, epoch, accumulation_steps=4):
56
  model.train()
57
  running_loss = 0.0
58
+ correct1 = 0
59
+ correct5 = 0
60
  total = 0
61
  pbar = tqdm(train_loader)
62
 
 
74
  optimizer.zero_grad()
75
 
76
  running_loss += loss.item() * accumulation_steps
77
+ _, predicted = outputs.topk(5, 1, True, True)
78
  total += targets.size(0)
79
+ correct1 += predicted[:, :1].eq(targets.view(-1, 1).expand_as(predicted[:, :1])).sum().item()
80
+ correct5 += predicted.eq(targets.view(-1, 1).expand_as(predicted)).sum().item()
81
 
82
+ pbar.set_description(desc=f'Epoch {epoch} | Loss: {running_loss / (batch_idx + 1):.4f} | Top-1 Acc: {100. * correct1 / total:.2f} | Top-5 Acc: {100. * correct5 / total:.2f}')
83
 
84
  if (batch_idx + 1) % 50 == 0:
85
  torch.cuda.empty_cache()
86
 
87
+ return 100. * correct1 / total, 100. * correct5 / total, running_loss / len(train_loader)
 
88
 
89
  # Testing function
90
  def test(model, device, test_loader, criterion):
91
  model.eval()
92
  test_loss = 0
93
+ correct1 = 0
94
+ correct5 = 0
95
  total = 0
96
+ misclassified_images = []
97
+ misclassified_labels = []
98
+ misclassified_preds = []
99
 
100
  with torch.no_grad():
101
  for inputs, targets in test_loader:
 
104
  loss = criterion(outputs, targets)
105
 
106
  test_loss += loss.item()
107
+ _, predicted = outputs.topk(5, 1, True, True)
108
  total += targets.size(0)
109
+ correct1 += predicted[:, :1].eq(targets.view(-1, 1).expand_as(predicted[:, :1])).sum().item()
110
+ correct5 += predicted.eq(targets.view(-1, 1).expand_as(predicted)).sum().item()
111
+
112
+ # Collect misclassified samples
113
+ for i in range(inputs.size(0)):
114
+ if targets[i] not in predicted[i, :1]:
115
+ misclassified_images.append(inputs[i].cpu())
116
+ misclassified_labels.append(targets[i].cpu())
117
+ misclassified_preds.append(predicted[i, :1].cpu())
118
 
119
+ test_accuracy1 = 100. * correct1 / total
120
+ test_accuracy5 = 100. * correct5 / total
121
+ print(f'Test Loss: {test_loss/len(test_loader):.4f}, Top-1 Accuracy: {test_accuracy1:.2f}, Top-5 Accuracy: {test_accuracy5:.2f}')
122
+ return test_accuracy1, test_accuracy5, test_loss / len(test_loader), misclassified_images, misclassified_labels, misclassified_preds
123
 
124
  # Main execution
125
  if __name__ == '__main__':
 
134
  except FileNotFoundError:
135
  print("No checkpoint found, starting from scratch.")
136
 
137
+ # Store results for each epoch
138
+ results = []
139
+ learning_rates = []
140
+
141
+ for epoch in range(1, 26): # 20 epochs
142
+ train_accuracy1, train_accuracy5, train_loss = train(model, device, trainloader, optimizer, criterion, epoch)
143
+ test_accuracy1, test_accuracy5, test_loss, misclassified_images, misclassified_labels, misclassified_preds = test(model, device, testloader, criterion)
144
+ print(f'Epoch {epoch} | Train Top-1 Acc: {train_accuracy1:.2f} | Train Top-5 Acc: {train_accuracy5:.2f} | Test Top-1 Acc: {test_accuracy1:.2f} | Test Top-5 Acc: {test_accuracy5:.2f}')
145
+
146
+ # Append results for this epoch
147
+ results.append((epoch, train_accuracy1, train_accuracy5, test_accuracy1, test_accuracy5, train_loss, test_loss))
148
+ learning_rates.append(optimizer.param_groups[0]['lr'])
149
+
150
  if test_loss < best_loss:
151
  best_loss = test_loss
152
  patience_counter = 0
 
157
  if patience_counter >= patience:
158
  print("Early stopping triggered. Training terminated.")
159
  break
160
+
161
+ # Only process misclassified samples after the last epoch
162
+ if epoch == 25:
163
+ # Display or process misclassified samples
164
+ if misclassified_images:
165
+ print("\nDisplaying some misclassified samples from the last epoch:")
166
+ misclassified_grid = make_grid(misclassified_images[:16], nrow=4, normalize=True, scale_each=True)
167
+ plt.figure(figsize=(8, 8))
168
+ plt.imshow(misclassified_grid.permute(1, 2, 0))
169
+ plt.title("Misclassified Samples")
170
+ plt.axis('off')
171
+ plt.show()
172
+
173
+ # Print the Top-1 accuracy results in a tab-separated format
174
+ print("\nEpoch\tTrain Top-1 Accuracy\tTest Top-1 Accuracy")
175
+ for epoch, train_acc1, test_acc1, *_ in results:
176
+ print(f"{epoch}\t{train_acc1:.2f}\t{test_acc1:.2f}")
177
+
178
+ # Plotting
179
+ epochs = [r[0] for r in results]
180
+ train_acc1 = [r[1] for r in results]
181
+ train_acc5 = [r[2] for r in results]
182
+ test_acc1 = [r[3] for r in results]
183
+ test_acc5 = [r[4] for r in results]
184
+ train_losses = [r[5] for r in results]
185
+ test_losses = [r[6] for r in results]
186
+
187
+ plt.figure(figsize=(12, 8))
188
+ plt.subplot(2, 2, 1)
189
+ plt.plot(epochs, train_acc1, label='Train Top-1 Acc')
190
+ plt.plot(epochs, test_acc1, label='Test Top-1 Acc')
191
+ plt.xlabel('Epoch')
192
+ plt.ylabel('Accuracy')
193
+ plt.legend()
194
+ plt.title('Top-1 Accuracy')
195
+
196
+ plt.subplot(2, 2, 2)
197
+ plt.plot(epochs, train_acc5, label='Train Top-5 Acc')
198
+ plt.plot(epochs, test_acc5, label='Test Top-5 Acc')
199
+ plt.xlabel('Epoch')
200
+ plt.ylabel('Accuracy')
201
+ plt.legend()
202
+ plt.title('Top-5 Accuracy')
203
+
204
+ plt.subplot(2, 2, 3)
205
+ plt.plot(epochs, train_losses, label='Train Loss')
206
+ plt.plot(epochs, test_losses, label='Test Loss')
207
+ plt.xlabel('Epoch')
208
+ plt.ylabel('Loss')
209
+ plt.legend()
210
+ plt.title('Loss')
211
+
212
+ plt.subplot(2, 2, 4)
213
+ plt.plot(epochs, learning_rates, label='Learning Rate')
214
+ plt.xlabel('Epoch')
215
+ plt.ylabel('Learning Rate')
216
+ plt.legend()
217
+ plt.title('Learning Rate')
218
+
219
+ plt.tight_layout()
220
+ plt.show()
tmppl87qjev/_remote_module_non_scriptable.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import *
2
+
3
+ import torch
4
+ import torch.distributed.rpc as rpc
5
+ from torch import Tensor
6
+ from torch._jit_internal import Future
7
+ from torch.distributed.rpc import RRef
8
+ from typing import Tuple # pyre-ignore: unused import
9
+
10
+
11
+ module_interface_cls = None
12
+
13
+
14
+ def forward_async(self, *args, **kwargs):
15
+ args = (self.module_rref, self.device, self.is_device_map_set, *args)
16
+ kwargs = {**kwargs}
17
+ return rpc.rpc_async(
18
+ self.module_rref.owner(),
19
+ _remote_forward,
20
+ args,
21
+ kwargs,
22
+ )
23
+
24
+
25
+ def forward(self, *args, **kwargs):
26
+ args = (self.module_rref, self.device, self.is_device_map_set, *args)
27
+ kwargs = {**kwargs}
28
+ ret_fut = rpc.rpc_async(
29
+ self.module_rref.owner(),
30
+ _remote_forward,
31
+ args,
32
+ kwargs,
33
+ )
34
+ return ret_fut.wait()
35
+
36
+
37
+ _generated_methods = [
38
+ forward_async,
39
+ forward,
40
+ ]
41
+
42
+
43
+
44
+
45
+ def _remote_forward(
46
+ module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
47
+ module = module_rref.local_value()
48
+ device = torch.device(device)
49
+
50
+ if device.type != "cuda":
51
+ return module.forward(*args, **kwargs)
52
+
53
+ # If the module is on a cuda device,
54
+ # move any CPU tensor in args or kwargs to the same cuda device.
55
+ # Since torch script does not support generator expression,
56
+ # have to use concatenation instead of
57
+ # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
58
+ args = (*args,)
59
+ out_args: Tuple[()] = ()
60
+ for arg in args:
61
+ arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
62
+ out_args = out_args + arg
63
+
64
+ kwargs = {**kwargs}
65
+ for k, v in kwargs.items():
66
+ if isinstance(v, Tensor):
67
+ kwargs[k] = kwargs[k].to(device)
68
+
69
+ if is_device_map_set:
70
+ return module.forward(*out_args, **kwargs)
71
+
72
+ # If the device map is empty, then only CPU tensors are allowed to send over wire,
73
+ # so have to move any GPU tensor to CPU in the output.
74
+ # Since torch script does not support generator expression,
75
+ # have to use concatenation instead of
76
+ # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
77
+ ret: Tuple[()] = ()
78
+ for i in module.forward(*out_args, **kwargs):
79
+ i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
80
+ ret = ret + i
81
+ return ret