File size: 3,155 Bytes
4d53442 6c4cc42 d5e2292 4d53442 1dd6e7f e5a4bcd f7f2b99 54acd71 4d53442 54acd71 655bbff 4d53442 54acd71 4d53442 54acd71 6b20835 54acd71 6b20835 54acd71 4d53442 54acd71 4d53442 dddbd3c 655bbff 4d53442 0a0c4c3 655bbff 54acd71 4d53442 9d66f2e c93d2b2 9d66f2e dddbd3c 4d53442 83b8e1f 2ddaf73 83b8e1f 2ddaf73 83b8e1f 4d53442 83b8e1f 4d53442 83b8e1f 4d53442 54acd71 4d53442 83b8e1f 4d53442 9d66f2e 4d53442 16acbcf 4d53442 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# -*- coding: utf-8 -*-
"""Ai Re-Phraser.py
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/18bvmXQqMIkk7G0gY_1dUolI08RK6Ajrf
"""
# importing the libraries
import os
import pandas as pd
from parrot import Parrot #Parrot offers knobs to control Adequacy, Fluency and Diversity metrics
import torch
import warnings
import nltk
import warnings
warnings.filterwarnings("ignore")
from sentence_splitter import SentenceSplitter, split_text_into_sentences
splitter = SentenceSplitter(language='en')
from parrot import Parrot
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
from parrot.filters import Adequacy
from parrot.filters import Fluency
from parrot.filters import Diversity
# Adding the metrics
adequacy_score = Adequacy()
fluency_score = Fluency()
diversity_score= Diversity()
device= "cuda:0"
adequacy_threshold = 0.99
fluency_threshold = 0.90 # Fluency (Is the paraphrase fluent English?)
diversity_ranker="levenshtein"
do_diverse=True # Diversity (Lexical / Phrasal / Syntactical) (How much has the paraphrase changed the original sentence?)
#num_beam_groups (int) — Number of groups to divide num_beams into in order to ensure diversity among different groups of beams
# adding the model
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model_pegasus = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
def get_max_str(lst):
return max(lst, key=len)
def get_response(input_text):
batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors='pt').to(torch_device)
translated = model_pegasus.generate(**batch,max_length=60,num_beams=15, num_return_sequences=15, temperature=1.5)
#num_beam_groups=num_beams, diversity_penalty=0.5
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
try:
adequacy_filtered = adequacy_score.filter(input_text,tgt_text, adequacy_threshold, device)
if len(adequacy_filtered) > 0 :
fluency_filtered = fluency_score.filter(adequacy_filtered, fluency_threshold, device )
if len(fluency_filtered) > 0 :
diversity_scored = diversity_score.rank(input_text, fluency_filtered, diversity_ranker)
return get_max_str(diversity_scored)
else:
return get_max_str(fluency_filtered)
else:
return get_max_str(adequacy_filtered)
except:
return(get_max_str(tgt_text))
# Deploying the model
import gradio as gr
def get_fun(Input_txt):
tokens = splitter.split(text=Input_txt)
txt_paraphrase=''
for phrase in tokens:
tmp=get_response(phrase)
txt_paraphrase=txt_paraphrase+' '+tmp
return txt_paraphrase
iface = gr.Interface(fn=get_fun, inputs="text", outputs="text", title = " Ai Re-Phraser Q'Hackday")
iface.launch(inline=False)
"""# New Section""" |