File size: 3,350 Bytes
1da9760 d987ebd 1da9760 4ebe664 279b6de 1da9760 eed5e39 dffcf5a 1da9760 dffcf5a 1da9760 ce9f98a 1da9760 77147a1 1da9760 1eee62f 77147a1 1da9760 dffcf5a 1da9760 9081421 bdb2bc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
'''
# upload model
import torch
from transformers import GPT2LMHeadModel,GPT2Tokenizer, GPT2Config
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = torch.load('text_summary_4sets_2_550.pth', map_location=torch.device('mps'))
model.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short")
tokenizer.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short")
'''
import torch
import numpy as np
from typing import Dict, List, Any
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def topk(probs, n=9):
# The scores are initially softmaxed to convert to probabilities
probs = torch.softmax(probs, dim= -1)
# PyTorch has its own topk method, which we use here
tokensProb, topIx = torch.topk(probs, k=n)
# The new selection pool (9 choices) is normalized
tokensProb = tokensProb / torch.sum(tokensProb)
# Send to CPU for numpy handling
tokensProb = tokensProb.cpu().detach().numpy()
# Make a random choice from the pool based on the new prob distribution
choice = np.random.choice(n, 1, p = tokensProb)#[np.argmax(tokensProb)]#
tokenId = topIx[choice][0]
return int(tokenId)
def model_infer(model, tokenizer, review, max_length=300):
result_text = []
for i in range(6):
# Preprocess the init token (task designator)
review_encoded = tokenizer.encode(review)
result = review_encoded
initial_input = torch.tensor(review_encoded).unsqueeze(0).to(device)
with torch.set_grad_enabled(False):
# Feed the init token to the model
output = model(initial_input)
# Flatten the logits at the final time step
logits = output.logits[0,-1]
# Make a top-k choice and append to the result
#choices = [topk(logits) for i in range(5)]
choices = topk(logits)
result.append(choices)
# For max_length times:
for _ in range(max_length):
# Feed the current sequence to the model and make a choice
input = torch.tensor(result).unsqueeze(0).to(device)
output = model(input)
logits = output.logits[0,-1]
res_id = topk(logits)
# If the chosen token is EOS, return the result
if res_id == tokenizer.eos_token_id:
return tokenizer.decode(result)
else: # Append to the sequence
result.append(res_id)
# IF no EOS is generated, return after the max_len
result_text.append(tokenizer.decode(result))
return sorted(result_text, key=len)[3]
class EndpointHandler():
def __init__(self, path=""):
# load model and tokenizer from path
self.tokenizer = AutoTokenizer.from_pretrained("Lin0He/text-summary-gpt2-short")
self.model = AutoModelForCausalLM.from_pretrained("Lin0He/text-summary-gpt2-short")
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
# process input
inputs = data.pop("inputs", data)
# process input text
prediction = model_infer( self.model, self.tokenizer, inputs+"TL;DR")
return prediction
|