Spaces:
Runtime error
Runtime error
import torch | |
import torch.nn as nn | |
from transformers import ViTModel | |
from src.models.segmentation_head import SegmentationHead | |
class ViTSegmentation(nn.Module): | |
def __init__(self, image_size: int = 224, num_classes: int = 9) -> None: | |
super().__init__() | |
self.mean = [0.5, 0.5, 0.5] | |
self.std = [0.5, 0.5, 0.5] | |
self.backbone = ViTModel.from_pretrained("google/vit-base-patch16-224") | |
self.segmentation_head = SegmentationHead(in_channels=768, num_classes=num_classes) | |
for param in self.backbone.parameters(): | |
param.requires_grad = False | |
def forward(self, x: torch.Tensor) -> torch.Tensor: | |
batch_size, channels, height, width = x.size() | |
assert height == width == self.backbone.config.image_size, "The image must match the size required by the ViT model" | |
outputs = self.backbone(pixel_values=x).last_hidden_state | |
patch_dim = int(height / self.backbone.config.patch_size) | |
outputs = outputs[:, 1:, :] | |
outputs = outputs.permute(0, 2, 1).view(batch_size, -1, patch_dim, patch_dim) | |
masks = self.segmentation_head(outputs) | |
return masks | |
def main() -> None: | |
model = ViTSegmentation(image_size=224, num_classes=18) | |
num_params = sum([p.numel() for p in model.parameters()]) | |
print(f"params: {num_params/1e6:.2f} M") | |
if __name__ == "__main__": | |
main() |