Lin0He
/

text-summary-gpt2-short

text-generation

text-generation-inference

Model card Files Files and versions Community

text-summary-gpt2-short / handler.py

Lin0He's picture

Update handler.py

4ebe664 over 1 year ago

3.35 kB

	'''
	# upload model
	import torch
	from transformers import GPT2LMHeadModel,GPT2Tokenizer, GPT2Config

	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
	model = torch.load('text_summary_4sets_2_550.pth', map_location=torch.device('mps'))

	model.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short")
	tokenizer.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short")
	'''
	import torch
	import numpy as np
	from typing import Dict, List, Any
	from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	def topk(probs, n=9):
	# The scores are initially softmaxed to convert to probabilities
	probs = torch.softmax(probs, dim= -1)
	# PyTorch has its own topk method, which we use here
	tokensProb, topIx = torch.topk(probs, k=n)
	# The new selection pool (9 choices) is normalized
	tokensProb = tokensProb / torch.sum(tokensProb)
	# Send to CPU for numpy handling
	tokensProb = tokensProb.cpu().detach().numpy()
	# Make a random choice from the pool based on the new prob distribution
	choice = np.random.choice(n, 1, p = tokensProb)#[np.argmax(tokensProb)]#
	tokenId = topIx[choice][0]
	return int(tokenId)

	def model_infer(model, tokenizer, review, max_length=300):
	result_text = []
	for i in range(6):

	# Preprocess the init token (task designator)
	review_encoded = tokenizer.encode(review)
	result = review_encoded
	initial_input = torch.tensor(review_encoded).unsqueeze(0).to(device)

	with torch.set_grad_enabled(False):
	# Feed the init token to the model
	output = model(initial_input)

	# Flatten the logits at the final time step
	logits = output.logits[0,-1]

	# Make a top-k choice and append to the result
	#choices = [topk(logits) for i in range(5)]
	choices = topk(logits)
	result.append(choices)

	# For max_length times:
	for _ in range(max_length):
	# Feed the current sequence to the model and make a choice
	input = torch.tensor(result).unsqueeze(0).to(device)
	output = model(input)
	logits = output.logits[0,-1]
	res_id = topk(logits)

	# If the chosen token is EOS, return the result
	if res_id == tokenizer.eos_token_id:
	return tokenizer.decode(result)
	else: # Append to the sequence
	result.append(res_id)

	# IF no EOS is generated, return after the max_len
	result_text.append(tokenizer.decode(result))
	return sorted(result_text, key=len)[3]

	class EndpointHandler():
	def __init__(self, path=""):
	# load model and tokenizer from path
	self.tokenizer = AutoTokenizer.from_pretrained("Lin0He/text-summary-gpt2-short")
	self.model = AutoModelForCausalLM.from_pretrained("Lin0He/text-summary-gpt2-short")


	def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
	# process input
	inputs = data.pop("inputs", data)
	# process input text
	prediction = model_infer( self.model, self.tokenizer, inputs+"TL;DR")
	return prediction