Amitontheweb commited on
Commit
c41038b
·
verified ·
1 Parent(s): 93ae7ac

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -2,6 +2,7 @@ import numpy as np
2
  import pandas as pd
3
  import re
4
  import torch
 
5
 
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
 
@@ -14,7 +15,7 @@ model_gen_title = AutoModelForSeq2SeqLM.from_pretrained("Ateeqq/news-title-gener
14
  def generate_title(input_text): #Generate a title for input text with Ateeq model
15
 
16
  input_ids = tokenizer_gen_title.encode(input_text, return_tensors="pt") #Tokenize input text
17
- input_ids = input_ids.to('cuda') #Send tokenized inputs to gpu
18
  output = model_gen_title.generate(input_ids,
19
  max_new_tokens=100,
20
  do_sample=True,
@@ -34,18 +35,19 @@ def split_into_sentences(paragraph): #For paraphraser - return a list of sentenc
34
  return sentences
35
 
36
  def paraphrase(
37
- question,
 
38
  #num_beams=10,
39
  #num_beam_groups=10,
40
  #num_return_sequences=1,
41
  #repetition_penalty=1.0,
42
  #diversity_penalty=1.0,
43
- no_repeat_ngram_size=3,
44
  temperature=0.8,
45
  max_length=128
46
  ):
47
-
48
- sentence_list = split_into_sentences(question) #feed input para into sentence splitter
49
  output = [] #List to hold the individual rephrased sentences obtained from the model
50
 
51
  for sentence in sentence_list:
@@ -60,23 +62,37 @@ def paraphrase(
60
  outputs = model.generate(
61
  input_ids,
62
  do_sample=True,
 
63
  temperature=temperature,
64
  max_length=max_length,
65
- no_repeat_ngram_size=no_repeat_ngram_size
66
  )
67
 
68
  res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
69
  output.append(res[0]) #Add rephrased sentence to list
70
 
71
  paraphrased_text = "" #to hold the combined sentence output made from generated list
 
72
 
73
  for sentence in output: #Join all new reworded sentences together
74
- paraphrased_text += sentence + " "
75
-
76
  for title in range (1,4): #Print 3 titles by calling Ateeq model fn - generate_title
77
- print (f"Title {title}: {generate_title (paraphrased_text)}")
78
- print ("")
 
79
 
80
- return paraphrased_text # Return paraphrased text after printing three titles above
 
 
81
 
 
 
 
 
 
 
 
 
82
 
 
 
2
  import pandas as pd
3
  import re
4
  import torch
5
+ import gradio as gr
6
 
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
 
 
15
  def generate_title(input_text): #Generate a title for input text with Ateeq model
16
 
17
  input_ids = tokenizer_gen_title.encode(input_text, return_tensors="pt") #Tokenize input text
18
+ #input_ids = input_ids.to('cuda') #Send tokenized inputs to gpu
19
  output = model_gen_title.generate(input_ids,
20
  max_new_tokens=100,
21
  do_sample=True,
 
35
  return sentences
36
 
37
  def paraphrase(
38
+ text,
39
+ beam_search,
40
  #num_beams=10,
41
  #num_beam_groups=10,
42
  #num_return_sequences=1,
43
  #repetition_penalty=1.0,
44
  #diversity_penalty=1.0,
45
+ #no_repeat_ngram_size=3,
46
  temperature=0.8,
47
  max_length=128
48
  ):
49
+ if text != "":
50
+ sentence_list = split_into_sentences(text) #feed input para into sentence splitter
51
  output = [] #List to hold the individual rephrased sentences obtained from the model
52
 
53
  for sentence in sentence_list:
 
62
  outputs = model.generate(
63
  input_ids,
64
  do_sample=True,
65
+ num_beams = 20 if beam_search else 1,
66
  temperature=temperature,
67
  max_length=max_length,
68
+ no_repeat_ngram_size=4
69
  )
70
 
71
  res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
72
  output.append(res[0]) #Add rephrased sentence to list
73
 
74
  paraphrased_text = "" #to hold the combined sentence output made from generated list
75
+ titles_list = "" #to hold the three titles
76
 
77
  for sentence in output: #Join all new reworded sentences together
78
+ paraphrased_text += sentence + " "
79
+
80
  for title in range (1,4): #Print 3 titles by calling Ateeq model fn - generate_title
81
+
82
+ titles_list += (f"Title {title}: {generate_title (paraphrased_text)}<br>")
83
+ #titles_list.append ("") #space after each title
84
 
85
+
86
+ return (titles_list, paraphrased_text) # Return paraphrased text after printing three titles above
87
+
88
 
89
+ iface = gr.Interface(fn=paraphrase,
90
+ inputs=[gr.Textbox(label="Paste text in the input box and press 'Submit'.", lines=10), "checkbox", gr.Slider(0.1, 2, 0.8)],
91
+ outputs=[gr.HTML(label="Titles:"), gr.Textbox(label="Rephrased text:", lines=15)],
92
+ title="AI Paraphraser with Title Generator",
93
+ description="Sentencet-to-sentence rewording backed with GPT-3.5 training set",
94
+ article="<div align=left><h1>AI Paraphraser and Title Generator</h1><li>Each sentence is rephrased separately without context.</li><li>Temperature: Increase value for more creative rewordings. Higher values may corrupt the sentence. Reset value after pressing 'Clear'</li><li>Beam search: Try for safer and conservative rephrasing.</li><p>Models:<br><li>Training set derived by using Chat-GPT3.5. No competition intended.</li><li>Original models: humarin/chatgpt_paraphraser_on_T5_base and Ateeq_news_title_generator. Deployment code modified for long text inputs.</li></p><p>Parameter details:<br><li>For rephraser: Beam search: No. of beams = 20, no_repeat_ngram_size=4, do_sample=True.</li><li>For title generator: do_sample=True, temperature=0.8, top_k = 20 </li></div>",
95
+ flagging_mode='never'
96
+ )
97
 
98
+ iface.launch()