Lin0He's picture
Update pipeline.py
11ee141 verified
raw
history blame contribute delete
2.97 kB
import torch
import numpy as np
from typing import Dict, List, Any
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def topk(probs, n=9):
# The scores are initially softmaxed to convert to probabilities
probs = torch.softmax(probs, dim= -1)
# PyTorch has its own topk method, which we use here
tokensProb, topIx = torch.topk(probs, k=n)
# The new selection pool (9 choices) is normalized
tokensProb = tokensProb / torch.sum(tokensProb)
# Send to CPU for numpy handling
tokensProb = tokensProb.cpu().detach().numpy()
# Make a random choice from the pool based on the new prob distribution
choice = np.random.choice(n, 1, p = tokensProb)#[np.argmax(tokensProb)]#
tokenId = topIx[choice][0]
return int(tokenId)
def model_infer(model, tokenizer, review, max_length=10):
result_text = []
for i in range(6):
# Preprocess the init token (task designator)
review_encoded = tokenizer.encode(review)
result = review_encoded
initial_input = torch.tensor(review_encoded).unsqueeze(0).to(device)
with torch.set_grad_enabled(False):
# Feed the init token to the model
output = model(initial_input)
# Flatten the logits at the final time step
logits = output.logits[0,-1]
# Make a top-k choice and append to the result
#choices = [topk(logits) for i in range(5)]
choices = topk(logits)
result.append(choices)
# For max_length times:
for _ in range(max_length):
# Feed the current sequence to the model and make a choice
input = torch.tensor(result).unsqueeze(0).to(device)
output = model(input)
logits = output.logits[0,-1]
res_id = topk(logits)
# If the chosen token is EOS, return the result
if res_id == tokenizer.eos_token_id:
result_text.append(tokenizer.decode(result)[len(review):])
break
else: # Append to the sequence
result.append(res_id)
# IF no EOS is generated, return after the max_len
#result_text.append(tokenizer.decode(result))
return sorted(result_text, key=len)[4]
class PreTrainedPipeline():
def __init__(self, path=""):
# load model and tokenizer from path
self.tokenizer = AutoTokenizer.from_pretrained("Lin0He/text-summary-gpt2-short")
self.model = AutoModelForCausalLM.from_pretrained("Lin0He/text-summary-gpt2-short")
def __call__(self, data) -> Dict[str,str]:
# process input
inputs = data#.pop("inputs", data)
# process input text
prediction = model_infer(self.model, self.tokenizer, inputs+"TL;DR")
return {"text": prediction[len(inputs):]}