Spaces:
Runtime error
Runtime error
import math | |
import numpy as np | |
import torch | |
import torch.nn as nn | |
from torch.nn import functional as F | |
from torchvision.ops import roi_align, roi_pool | |
class VQAHead(nn.Module): | |
"""MLP Regression Head for VQA. | |
Args: | |
in_channels: input channels for MLP | |
hidden_channels: hidden channels for MLP | |
dropout_ratio: the dropout ratio for features before the MLP (default 0.5) | |
pre_pool: whether pre-pool the features or not (True for Aesthetic Attributes, False for Technical Attributes) | |
""" | |
def __init__( | |
self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, pre_pool=False, **kwargs | |
): | |
super().__init__() | |
self.dropout_ratio = dropout_ratio | |
self.in_channels = in_channels | |
self.hidden_channels = hidden_channels | |
self.pre_pool = pre_pool | |
if self.dropout_ratio != 0: | |
self.dropout = nn.Dropout(p=self.dropout_ratio) | |
else: | |
self.dropout = None | |
self.fc_hid = nn.Conv3d(self.in_channels, self.hidden_channels, (1, 1, 1)) | |
self.fc_last = nn.Conv3d(self.hidden_channels, 1, (1, 1, 1)) | |
self.gelu = nn.GELU() | |
self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) | |
def forward(self, x, rois=None): | |
if self.pre_pool: | |
x = self.avg_pool(x) | |
x = self.dropout(x) | |
qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x)))) | |
return qlt_score | |
class VARHead(nn.Module): | |
"""MLP Regression Head for Video Action Recognition. | |
Args: | |
in_channels: input channels for MLP | |
hidden_channels: hidden channels for MLP | |
dropout_ratio: the dropout ratio for features before the MLP (default 0.5) | |
""" | |
def __init__(self, in_channels=768, out_channels=400, dropout_ratio=0.5, **kwargs): | |
super().__init__() | |
self.dropout_ratio = dropout_ratio | |
self.in_channels = in_channels | |
self.out_channels = out_channels | |
if self.dropout_ratio != 0: | |
self.dropout = nn.Dropout(p=self.dropout_ratio) | |
else: | |
self.dropout = None | |
self.fc = nn.Conv3d(self.in_channels, self.out_channels, (1, 1, 1)) | |
self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) | |
def forward(self, x, rois=None): | |
x = self.dropout(x) | |
x = self.avg_pool(x) | |
out = self.fc(x) | |
return out | |
class IQAHead(nn.Module): | |
"""MLP Regression Head for IQA. | |
Args: | |
in_channels: input channels for MLP | |
hidden_channels: hidden channels for MLP | |
dropout_ratio: the dropout ratio for features before the MLP (default 0.5) | |
""" | |
def __init__( | |
self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, **kwargs | |
): | |
super().__init__() | |
self.dropout_ratio = dropout_ratio | |
self.in_channels = in_channels | |
self.hidden_channels = hidden_channels | |
if self.dropout_ratio != 0: | |
self.dropout = nn.Dropout(p=self.dropout_ratio) | |
else: | |
self.dropout = None | |
self.fc_hid = nn.Linear(self.in_channels, self.hidden_channels) | |
self.fc_last = nn.Linear(self.hidden_channels, 1) | |
self.gelu = nn.GELU() | |
def forward(self, x): | |
x = self.dropout(x) | |
qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x)))) | |
return qlt_score | |