Spaces:

Deep1994
/

t5-paraphrase

Runtime error

App Files Files Community

Deep1994 commited on Mar 24, 2022

Commit

cd16b80

1 Parent(s): ca02a1d

Create app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import streamlit as st
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+import torch
+def set_seed(seed):
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+tokenizer = T5Tokenizer.from_pretrained('Deep1994/t5-paraphrase-quora')
+@st.cache(allow_output_mutation=True)
+def load_model():
+    model = T5ForConditionalGeneration.from_pretrained('Deep1994/t5-paraphrase-quora')
+    return model
+model = load_model()
+st.sidebar.subheader('Select decoding strategy below.')
+decoding_strategy = st.sidebar.selectbox("decoding_strategy", ['Top k/p sampling', 'Beam Search'])
+st.title('Paraphrase a question in English.')
+st.write('This is a fine-tuned t5 model that will paraphrase\
+         your English input text into another English output\
+         by leveraging a pre-trained [Text-To-Text Transfer Tranformers](https://arxiv.org/abs/1910.10683) model.')
+st.subheader('Input Text')
+text = st.text_area(' ', height=100)
+if text != '':
+    set_seed(1234) # for reproducibility
+    prefix = 'paraphrase: '
+    encoding = tokenizer.encode_plus(prefix + text, padding=True, return_tensors="pt")
+    input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
+    if str(decoding_strategy) == 'Top k/p sampling':
+        beam_outputs = model.generate(
+            input_ids=input_ids, attention_mask=attention_masks,
+            do_sample=True,
+            max_length=20,
+            top_k=50,
+            top_p=0.95,
+            early_stopping=True,
+            num_return_sequences=5
+        )
+    elif str(decoding_strategy) == 'Beam Search':
+        beam_outputs = model.generate(
+        input_ids=input_ids,
+        attention_mask=attention_masks,
+        max_length=20,
+        num_beams=5,
+        no_repeat_ngram_size=2,
+        num_return_sequences=5,
+        early_stopping=True
+        )
+    final_outputs =[]
+    for beam_output in beam_outputs:
+        sent = tokenizer.decode(beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+        # if sent.lower() != text.lower() and sent not in final_outputs:
+        #     final_outputs.append(sent)
+        final_outputs.append(sent)
+    st.subheader('Paraphrased Text')
+    for i, final_output in enumerate(final_outputs):
+        st.write(final_output + '\n')