|
from transformers import RobertaTokenizer, RobertaForSequenceClassification |
|
import torch |
|
import numpy as np |
|
|
|
class Model: |
|
def __init__(self, model_weights): |
|
self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base') |
|
self.model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=4) |
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
checkpoint = torch.load(model_weights, map_location=self.device) |
|
state_dict = checkpoint.get("state_dict", checkpoint) |
|
|
|
|
|
filtered_state_dict = { |
|
k.replace("model.", ""): v |
|
for k, v in state_dict.items() |
|
if k.startswith("model.") |
|
} |
|
|
|
|
|
self.model.load_state_dict(filtered_state_dict, strict=False) |
|
|
|
self.currepoch = checkpoint.get("epoch", "N/A") |
|
self.loss = checkpoint.get("loss", "N/A") |
|
|
|
print(f"β
Loaded model state β Epoch: {self.currepoch}, Loss: {self.loss}") |
|
|
|
self.model.to(self.device) |
|
self.model.eval() |
|
|
|
self.labels = ["Blocker", "Critical", "Major", "Minor"] |
|
|
|
def predict(self, text): |
|
inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512) |
|
inputs = {k: v.to(self.device) for k, v in inputs.items()} |
|
|
|
with torch.no_grad(): |
|
outputs = self.model(**inputs) |
|
|
|
logits = outputs.logits |
|
probs = torch.nn.functional.softmax(logits, dim=-1) |
|
predicted_label = self.labels[torch.argmax(probs).item()] |
|
return probs[0].tolist(), predicted_label |
|
|
|
|
|
model_instance = None |
|
model_weights = "assets/roberta-priority-epoch=06-val_f1=0.72.ckpt" |
|
|
|
def get_model(): |
|
global model_instance |
|
if model_instance is None: |
|
model_instance = Model(model_weights) |
|
return model_instance |
|
|