''' # upload model import torch from transformers import GPT2LMHeadModel,GPT2Tokenizer, GPT2Config tokenizer = GPT2Tokenizer.from_pretrained("gpt2") model = torch.load('text_summary_4sets_2_550.pth', map_location=torch.device('mps')) model.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short") tokenizer.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short") ''' import torch import numpy as np from typing import Dict, List, Any from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer device = 'cuda' if torch.cuda.is_available() else 'cpu' def topk(probs, n=9): # The scores are initially softmaxed to convert to probabilities probs = torch.softmax(probs, dim= -1) # PyTorch has its own topk method, which we use here tokensProb, topIx = torch.topk(probs, k=n) # The new selection pool (9 choices) is normalized tokensProb = tokensProb / torch.sum(tokensProb) # Send to CPU for numpy handling tokensProb = tokensProb.cpu().detach().numpy() # Make a random choice from the pool based on the new prob distribution choice = np.random.choice(n, 1, p = tokensProb)#[np.argmax(tokensProb)]# tokenId = topIx[choice][0] return int(tokenId) def model_infer(model, tokenizer, review, max_length=300): result_text = [] for i in range(6): # Preprocess the init token (task designator) review_encoded = tokenizer.encode(review) result = review_encoded initial_input = torch.tensor(review_encoded).unsqueeze(0).to(device) with torch.set_grad_enabled(False): # Feed the init token to the model output = model(initial_input) # Flatten the logits at the final time step logits = output.logits[0,-1] # Make a top-k choice and append to the result #choices = [topk(logits) for i in range(5)] choices = topk(logits) result.append(choices) # For max_length times: for _ in range(max_length): # Feed the current sequence to the model and make a choice input = torch.tensor(result).unsqueeze(0).to(device) output = model(input) logits = output.logits[0,-1] res_id = topk(logits) # If the chosen token is EOS, return the result if res_id == tokenizer.eos_token_id: return tokenizer.decode(result) else: # Append to the sequence result.append(res_id) # IF no EOS is generated, return after the max_len result_text.append(tokenizer.decode(result)) return sorted(result_text, key=len)[3] class EndpointHandler(): def __init__(self, path=""): # load model and tokenizer from path self.tokenizer = AutoTokenizer.from_pretrained("Lin0He/text-summary-gpt2-short") self.model = AutoModelForCausalLM.from_pretrained("Lin0He/text-summary-gpt2-short") def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: # process input inputs = data.pop("inputs", data) # process input text prediction = model_infer( self.model, self.tokenizer,inputs+"TL;DR") return prediction