Dionyssos's picture
fx sounds batch inference
86b9ce4
import warnings
from transformers import T5EncoderModel, T5Tokenizer # type: ignore
import torch
from torch import nn
class T5Conditioner(nn.Module):
MODELS = ["t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b",
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large",
"google/flan-t5-xl", "google/flan-t5-xxl"]
MODELS_DIMS = {
"t5-small": 512,
"t5-base": 768,
"t5-large": 1024,
"t5-3b": 1024,
"t5-11b": 1024,
"google/flan-t5-small": 512,
"google/flan-t5-base": 768,
"google/flan-t5-large": 1024,
"google/flan-t5-3b": 1024,
"google/flan-t5-11b": 1024,
}
def __init__(self,
name,
output_dim,
device,
finetune=False):
print(f'{finetune=}')
assert name in self.MODELS, f"Unrecognized t5 model name (should in {self.MODELS})"
super().__init__()
self.dim = self.MODELS_DIMS[name]
self.output_dim = output_dim
self.output_proj = nn.Linear(self.dim, output_dim)
self.device = device
self.name = name
self.t5_tokenizer = T5Tokenizer.from_pretrained(name)
t5 = T5EncoderModel.from_pretrained(name).eval() #.train(mode=finetune)
if finetune:
self.t5 = t5
else:
# this makes sure that the t5 models is not part
# of the saved checkpoint
self.__dict__['t5'] = t5.to(device)
def tokenize(self, x):
entries = [xi if xi is not None else "" for xi in x]
inputs = self.t5_tokenizer(entries,
return_tensors='pt',
padding=True).to(self.device)
return inputs # 'input_ids' 'attentio mask'
def forward(self, descriptions):
d = self.tokenize(descriptions)
with torch.no_grad():
embeds = self.t5(input_ids=d['input_ids'],
attention_mask=d['attention_mask']
).last_hidden_state # no kvcache for txt conditioning
embeds = self.output_proj(embeds.to(self.output_proj.weight))
embeds = (embeds * d['attention_mask'].unsqueeze(-1))
return embeds # , d['attention_mask']