|
|
|
"""Ai Re-Phraser.py |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/18bvmXQqMIkk7G0gY_1dUolI08RK6Ajrf |
|
|
|
""" |
|
|
|
|
|
import os |
|
import pandas as pd |
|
from parrot import Parrot |
|
import torch |
|
import warnings |
|
import nltk |
|
import warnings |
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
from sentence_splitter import SentenceSplitter, split_text_into_sentences |
|
splitter = SentenceSplitter(language='en') |
|
from parrot import Parrot |
|
from transformers import PegasusForConditionalGeneration, PegasusTokenizer |
|
from transformers import AutoTokenizer |
|
from transformers import AutoModelForSeq2SeqLM |
|
from parrot.filters import Adequacy |
|
from parrot.filters import Fluency |
|
from parrot.filters import Diversity |
|
|
|
|
|
|
|
adequacy_score = Adequacy() |
|
fluency_score = Fluency() |
|
diversity_score= Diversity() |
|
device= "cuda:0" |
|
adequacy_threshold = 0.99 |
|
fluency_threshold = 0.90 |
|
diversity_ranker="levenshtein" |
|
do_diverse=True |
|
|
|
|
|
|
|
model_name = 'tuner007/pegasus_paraphrase' |
|
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
tokenizer = PegasusTokenizer.from_pretrained(model_name) |
|
model_pegasus = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device) |
|
|
|
def get_max_str(lst): |
|
return max(lst, key=len) |
|
def get_response(input_text): |
|
batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors='pt').to(torch_device) |
|
translated = model_pegasus.generate(**batch,max_length=60,num_beams=15, num_return_sequences=15, temperature=1.5) |
|
|
|
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) |
|
try: |
|
adequacy_filtered = adequacy_score.filter(input_text,tgt_text, adequacy_threshold, device) |
|
if len(adequacy_filtered) > 0 : |
|
fluency_filtered = fluency_score.filter(adequacy_filtered, fluency_threshold, device ) |
|
if len(fluency_filtered) > 0 : |
|
diversity_scored = diversity_score.rank(input_text, fluency_filtered, diversity_ranker) |
|
return get_max_str(diversity_scored) |
|
else: |
|
return get_max_str(fluency_filtered) |
|
else: |
|
return get_max_str(adequacy_filtered) |
|
except: |
|
return(get_max_str(tgt_text)) |
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
def get_fun(Input_txt): |
|
tokens = splitter.split(text=Input_txt) |
|
|
|
txt_paraphrase='' |
|
for phrase in tokens: |
|
tmp=get_response(phrase) |
|
txt_paraphrase=txt_paraphrase+' '+tmp |
|
return txt_paraphrase |
|
|
|
iface = gr.Interface(fn=get_fun, inputs="text", outputs="text", title = " Ai Re-Phraser Q'Hackday") |
|
iface.launch(inline=False) |
|
|
|
"""# New Section""" |