Upload app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import numpy as np
|
|
2 |
import pandas as pd
|
3 |
import re
|
4 |
import torch
|
|
|
5 |
|
6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
7 |
|
@@ -14,7 +15,7 @@ model_gen_title = AutoModelForSeq2SeqLM.from_pretrained("Ateeqq/news-title-gener
|
|
14 |
def generate_title(input_text): #Generate a title for input text with Ateeq model
|
15 |
|
16 |
input_ids = tokenizer_gen_title.encode(input_text, return_tensors="pt") #Tokenize input text
|
17 |
-
input_ids = input_ids.to('cuda') #Send tokenized inputs to gpu
|
18 |
output = model_gen_title.generate(input_ids,
|
19 |
max_new_tokens=100,
|
20 |
do_sample=True,
|
@@ -34,18 +35,19 @@ def split_into_sentences(paragraph): #For paraphraser - return a list of sentenc
|
|
34 |
return sentences
|
35 |
|
36 |
def paraphrase(
|
37 |
-
|
|
|
38 |
#num_beams=10,
|
39 |
#num_beam_groups=10,
|
40 |
#num_return_sequences=1,
|
41 |
#repetition_penalty=1.0,
|
42 |
#diversity_penalty=1.0,
|
43 |
-
no_repeat_ngram_size=3,
|
44 |
temperature=0.8,
|
45 |
max_length=128
|
46 |
):
|
47 |
-
|
48 |
-
sentence_list = split_into_sentences(
|
49 |
output = [] #List to hold the individual rephrased sentences obtained from the model
|
50 |
|
51 |
for sentence in sentence_list:
|
@@ -60,23 +62,37 @@ def paraphrase(
|
|
60 |
outputs = model.generate(
|
61 |
input_ids,
|
62 |
do_sample=True,
|
|
|
63 |
temperature=temperature,
|
64 |
max_length=max_length,
|
65 |
-
no_repeat_ngram_size=
|
66 |
)
|
67 |
|
68 |
res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
69 |
output.append(res[0]) #Add rephrased sentence to list
|
70 |
|
71 |
paraphrased_text = "" #to hold the combined sentence output made from generated list
|
|
|
72 |
|
73 |
for sentence in output: #Join all new reworded sentences together
|
74 |
-
paraphrased_text += sentence + " "
|
75 |
-
|
76 |
for title in range (1,4): #Print 3 titles by calling Ateeq model fn - generate_title
|
77 |
-
|
78 |
-
|
|
|
79 |
|
80 |
-
|
|
|
|
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
|
|
|
2 |
import pandas as pd
|
3 |
import re
|
4 |
import torch
|
5 |
+
import gradio as gr
|
6 |
|
7 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
8 |
|
|
|
15 |
def generate_title(input_text): #Generate a title for input text with Ateeq model
|
16 |
|
17 |
input_ids = tokenizer_gen_title.encode(input_text, return_tensors="pt") #Tokenize input text
|
18 |
+
#input_ids = input_ids.to('cuda') #Send tokenized inputs to gpu
|
19 |
output = model_gen_title.generate(input_ids,
|
20 |
max_new_tokens=100,
|
21 |
do_sample=True,
|
|
|
35 |
return sentences
|
36 |
|
37 |
def paraphrase(
|
38 |
+
text,
|
39 |
+
beam_search,
|
40 |
#num_beams=10,
|
41 |
#num_beam_groups=10,
|
42 |
#num_return_sequences=1,
|
43 |
#repetition_penalty=1.0,
|
44 |
#diversity_penalty=1.0,
|
45 |
+
#no_repeat_ngram_size=3,
|
46 |
temperature=0.8,
|
47 |
max_length=128
|
48 |
):
|
49 |
+
if text != "":
|
50 |
+
sentence_list = split_into_sentences(text) #feed input para into sentence splitter
|
51 |
output = [] #List to hold the individual rephrased sentences obtained from the model
|
52 |
|
53 |
for sentence in sentence_list:
|
|
|
62 |
outputs = model.generate(
|
63 |
input_ids,
|
64 |
do_sample=True,
|
65 |
+
num_beams = 20 if beam_search else 1,
|
66 |
temperature=temperature,
|
67 |
max_length=max_length,
|
68 |
+
no_repeat_ngram_size=4
|
69 |
)
|
70 |
|
71 |
res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
72 |
output.append(res[0]) #Add rephrased sentence to list
|
73 |
|
74 |
paraphrased_text = "" #to hold the combined sentence output made from generated list
|
75 |
+
titles_list = "" #to hold the three titles
|
76 |
|
77 |
for sentence in output: #Join all new reworded sentences together
|
78 |
+
paraphrased_text += sentence + " "
|
79 |
+
|
80 |
for title in range (1,4): #Print 3 titles by calling Ateeq model fn - generate_title
|
81 |
+
|
82 |
+
titles_list += (f"Title {title}: {generate_title (paraphrased_text)}<br>")
|
83 |
+
#titles_list.append ("") #space after each title
|
84 |
|
85 |
+
|
86 |
+
return (titles_list, paraphrased_text) # Return paraphrased text after printing three titles above
|
87 |
+
|
88 |
|
89 |
+
iface = gr.Interface(fn=paraphrase,
|
90 |
+
inputs=[gr.Textbox(label="Paste text in the input box and press 'Submit'.", lines=10), "checkbox", gr.Slider(0.1, 2, 0.8)],
|
91 |
+
outputs=[gr.HTML(label="Titles:"), gr.Textbox(label="Rephrased text:", lines=15)],
|
92 |
+
title="AI Paraphraser with Title Generator",
|
93 |
+
description="Sentencet-to-sentence rewording backed with GPT-3.5 training set",
|
94 |
+
article="<div align=left><h1>AI Paraphraser and Title Generator</h1><li>Each sentence is rephrased separately without context.</li><li>Temperature: Increase value for more creative rewordings. Higher values may corrupt the sentence. Reset value after pressing 'Clear'</li><li>Beam search: Try for safer and conservative rephrasing.</li><p>Models:<br><li>Training set derived by using Chat-GPT3.5. No competition intended.</li><li>Original models: humarin/chatgpt_paraphraser_on_T5_base and Ateeq_news_title_generator. Deployment code modified for long text inputs.</li></p><p>Parameter details:<br><li>For rephraser: Beam search: No. of beams = 20, no_repeat_ngram_size=4, do_sample=True.</li><li>For title generator: do_sample=True, temperature=0.8, top_k = 20 </li></div>",
|
95 |
+
flagging_mode='never'
|
96 |
+
)
|
97 |
|
98 |
+
iface.launch()
|