from transformers import PreTrainedModel, PretrainedConfig, AutoModel
from transformers.modeling_outputs import ModelOutput
import torch.nn as nn
import torch

class TimesheetEstimatorConfig(PretrainedConfig):
    def __init__(
        self, 
        encoder_model_name = "bert-base-cased",
        hidden_size=768, 
        **kwargs
    ):
        super().__init__(**kwargs)

        self.num_labels = 1
        self.hidden_size = hidden_size
        self.encoder_model_name = encoder_model_name

class TimesheetEstimator(PreTrainedModel):
    config_class = TimesheetEstimatorConfig

    def __init__(self, config: TimesheetEstimatorConfig):
        super().__init__(config)

        self.encoder = AutoModel.from_pretrained(config.encoder_model_name)
        self.hidden_layer = nn.Linear(config.hidden_size, config.hidden_size)
        self.norm_layer = nn.LayerNorm(config.hidden_size)
        self.estimate_layer = nn.Linear(config.hidden_size, config.num_labels)
        self.loss = nn.MSELoss()

    def forward(self, input_ids, attention_mask, labels=None):
        encoder_outputs = self.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask,
        )

        represent_vectors = encoder_outputs[0]

        hidden = self.hidden_layer(represent_vectors[:, 0, :])
        norm = self.norm_layer(hidden)
        estimate = self.estimate_layer(norm)

        loss = None
        if labels is not None:
            loss = self.loss(estimate, labels.reshape(-1, 1))

        return ModelOutput(
            loss=loss,
            logits=estimate,
        )