import numpy as np import pandas as pd import re import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") tokenizer_gen_title = AutoTokenizer.from_pretrained("Ateeqq/news-title-generator") model_gen_title = AutoModelForSeq2SeqLM.from_pretrained("Ateeqq/news-title-generator") def generate_title(input_text): #Generate a title for input text with Ateeq model input_ids = tokenizer_gen_title.encode(input_text, return_tensors="pt") #Tokenize input text input_ids = input_ids.to('cuda') #Send tokenized inputs to gpu output = model_gen_title.generate(input_ids, max_new_tokens=100, do_sample=True, temperature=0.8, top_k = 20 ) decoded_text = tokenizer_gen_title.decode(output[0], skip_special_tokens=True) return decoded_text def split_into_sentences(paragraph): #For paraphraser - return a list of sentences from input para # Split sentences after period. Retains \n if part of the text, but not included in model output sentence_endings = r'(?