Lin0He
/

text-summary-gpt2-short

+'''
+# upload model
+import torch
+from transformers import GPT2LMHeadModel,GPT2Tokenizer, GPT2Config
+tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+model = torch.load('text_summary_4sets_2_550.pth', map_location=torch.device('mps'))
+model.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short")
+tokenizer.push_to_hub(repo_name="text-summary-gpt2-short", repo_id="Lin0He/text-summary-gpt2-short")
+'''
+import torch
+from typing import Dict, List, Any
+from transformers import pipeline, AutoModel, AutoTokenizer
+class PreTrainedPipeline():
+    def __init__(self, path=""):
+        # load model and tokenizer from path
+        self.tokenizer = AutoTokenizer.from_pretrained("Lin0He/text-summary-gpt2-short")
+        self.model = AutoModel.from_pretrained("Lin0He/text-summary-gpt2-short")
+    def topk(probs, n=9):
+        # The scores are initially softmaxed to convert to probabilities
+        probs = torch.softmax(probs, dim= -1)
+        # PyTorch has its own topk method, which we use here
+        tokensProb, topIx = torch.topk(probs, k=n)
+        # The new selection pool (9 choices) is normalized
+        tokensProb = tokensProb / torch.sum(tokensProb)
+        # Send to CPU for numpy handling
+        tokensProb = tokensProb.cpu().detach().numpy()
+        # Make a random choice from the pool based on the new prob distribution
+        choice = np.random.choice(n, 1, p = tokensProb)#[np.argmax(tokensProb)]#
+        tokenId = topIx[choice][0]
+        return int(tokenId)
+    def model_infer(model, tokenizer, review, max_length=30):
+        # Preprocess the init token (task designator)
+        review_encoded = self.tokenizer.encode(review)
+        result = review_encoded
+        initial_input = torch.tensor(review_encoded).unsqueeze(0).to(device)
+        with torch.set_grad_enabled(False):
+            # Feed the init token to the model
+            output = self.model(initial_input)
+            # Flatten the logits at the final time step
+            logits = output.logits[0,-1]
+            # Make a top-k choice and append to the result
+            #choices = [topk(logits) for i in range(5)]
+            choices = self.topk(logits)
+            result.append(choices)
+            # For max_length times:
+            for _ in range(max_length):
+                # Feed the current sequence to the model and make a choice
+                input = torch.tensor(result).unsqueeze(0).to(device)
+                output = self.model(input)
+                logits = output.logits[0,-1]
+                res_id = self.topk(logits)
+                # If the chosen token is EOS, return the result
+                if res_id == self.tokenizer.eos_token_id:
+                    return self.tokenizer.decode(result)
+                else: # Append to the sequence
+                    result.append(res_id)
+        # IF no EOS is generated, return after the max_len
+        return self.tokenizer.decode(result)
+    def predict(text):
+        result_text = []
+        for i in range(6):
+            summary = self.model_infer(self.model, self.tokenizer, input+"TL;DR").strip()
+            result_text.append(summary[len(input)+5:])
+        return sorted(result_text, key=len)[3]
+        #print("summary:", sorted(result_text, key=len)[3])
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
+        # process input
+        inputs = data.pop("inputs", data)
+        # process input text
+        prediction = self.predict(inputs)
+        return {"text":prediction}
+'''
+predictor = pipeline("summarization", model = model, tokenizer = tokenizer)
+result = predictor("Input text for prediction")
+print(result)
+'''