sashtech commited on
Commit
5da5cc3
·
verified ·
1 Parent(s): fbc26ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -30
app.py CHANGED
@@ -6,7 +6,6 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
- import random
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -134,17 +133,66 @@ def correct_article_errors(text):
134
  corrected_text.append(token.text)
135
  return ' '.join(corrected_text)
136
 
137
- # Function to dynamically choose synonyms with more options
138
- def dynamic_synonyms(token, pos):
 
 
 
 
 
 
 
 
 
 
139
  synonyms = get_synonyms_nltk(token.lemma_, pos)
140
- # Choose a random synonym to increase variety
141
  if synonyms:
142
- random_synonym = random.choice(synonyms)
143
- return random_synonym
 
 
 
 
 
 
144
  return token.text
145
 
146
- # Function to rephrase text and replace words with more versatile synonyms
147
- def versatile_rephrase(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  doc = nlp(text)
149
  rephrased_text = []
150
 
@@ -158,25 +206,30 @@ def versatile_rephrase(text):
158
  pos_tag = wordnet.ADJ
159
  elif token.pos_ == "ADV":
160
  pos_tag = wordnet.ADV
161
-
162
  if pos_tag:
163
- synonym = dynamic_synonyms(token, pos_tag)
164
- if token.pos_ == "VERB":
165
- if token.tag_ == "VBG": # Present participle (e.g., running)
166
- synonym = synonym + 'ing'
167
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
168
- synonym = synonym + 'ed'
169
- elif token.tag_ == "VBZ": # Third-person singular present
170
- synonym = synonym + 's'
171
- elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
172
- synonym += 's' if not synonym ends with 's' else ""
173
- rephrased_text.append(synonym)
 
 
 
 
 
174
  else:
175
  rephrased_text.append(token.text)
176
 
177
  return ' '.join(rephrased_text)
178
 
179
- # Function to retain the structure of the input text (headings, paragraphs, line breaks)
180
  def retain_structure(text):
181
  lines = text.split("\n")
182
  formatted_lines = []
@@ -190,24 +243,35 @@ def retain_structure(text):
190
  return "\n".join(formatted_lines)
191
 
192
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
193
- def paraphrase_and_correct_with_structure(text):
 
194
  structured_text = retain_structure(text)
195
 
196
- # Rephrase with more versatile synonyms while maintaining grammatical forms
197
- paraphrased_text = versatile_rephrase(structured_text)
 
 
 
198
 
199
- # Apply grammatical corrections on the rephrased text
200
- paraphrased_text = remove_redundant_words(paraphrased_text)
201
- paraphrased_text = capitalize_sentences_and_nouns(paraphrased_text)
202
  paraphrased_text = force_first_letter_capital(paraphrased_text)
 
 
203
  paraphrased_text = handle_possessives(paraphrased_text)
 
 
204
  paraphrased_text = correct_article_errors(paraphrased_text)
205
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
206
  paraphrased_text = correct_tense_errors(paraphrased_text)
207
  paraphrased_text = correct_double_negatives(paraphrased_text)
208
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
 
 
 
 
 
209
  paraphrased_text = correct_spelling(paraphrased_text)
210
-
211
  return paraphrased_text
212
 
213
  # Gradio app setup with two tabs
@@ -227,6 +291,6 @@ with gr.Blocks() as demo:
227
  result2 = gr.Textbox(lines=5, label='Corrected Text')
228
 
229
  # Connect the paraphrasing and correction function to the button
230
- button2.click(fn=paraphrase_and_correct_with_structure, inputs=t2, outputs=result2)
231
 
232
- demo.launch(share=True)
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
133
  corrected_text.append(token.text)
134
  return ' '.join(corrected_text)
135
 
136
+ # Function to get the correct synonym while maintaining verb form
137
+ def replace_with_synonym(token):
138
+ pos = None
139
+ if token.pos_ == "VERB":
140
+ pos = wordnet.VERB
141
+ elif token.pos_ == "NOUN":
142
+ pos = wordnet.NOUN
143
+ elif token.pos_ == "ADJ":
144
+ pos = wordnet.ADJ
145
+ elif token.pos_ == "ADV":
146
+ pos = wordnet.ADV
147
+
148
  synonyms = get_synonyms_nltk(token.lemma_, pos)
149
+
150
  if synonyms:
151
+ synonym = synonyms[0]
152
+ if token.tag_ == "VBG": # Present participle (e.g., running)
153
+ synonym = synonym + 'ing'
154
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
155
+ synonym = synonym + 'ed'
156
+ elif token.tag_ == "VBZ": # Third-person singular present
157
+ synonym = synonym + 's'
158
+ return synonym
159
  return token.text
160
 
161
+ # Function to check for and avoid double negatives
162
+ def correct_double_negatives(text):
163
+ doc = nlp(text)
164
+ corrected_text = []
165
+ for token in doc:
166
+ if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
167
+ corrected_text.append("always")
168
+ else:
169
+ corrected_text.append(token.text)
170
+ return ' '.join(corrected_text)
171
+
172
+ # Function to ensure subject-verb agreement
173
+ def ensure_subject_verb_agreement(text):
174
+ doc = nlp(text)
175
+ corrected_text = []
176
+ for token in doc:
177
+ if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
178
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
179
+ corrected_text.append(token.head.lemma_ + "s")
180
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
181
+ corrected_text.append(token.head.lemma_)
182
+ corrected_text.append(token.text)
183
+ return ' '.join(corrected_text)
184
+
185
+ # Function to correct spelling errors
186
+ def correct_spelling(text):
187
+ words = text.split()
188
+ corrected_words = []
189
+ for word in words:
190
+ corrected_word = spell.correction(word)
191
+ corrected_words.append(corrected_word)
192
+ return ' '.join(corrected_words)
193
+
194
+ # Function to rephrase text and replace words with their synonyms while maintaining form
195
+ def rephrase_with_synonyms(text):
196
  doc = nlp(text)
197
  rephrased_text = []
198
 
 
206
  pos_tag = wordnet.ADJ
207
  elif token.pos_ == "ADV":
208
  pos_tag = wordnet.ADV
209
+
210
  if pos_tag:
211
+ synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
212
+ if synonyms:
213
+ # Use a more dynamic approach for synonyms
214
+ synonym = max(synonyms, key=lambda s: wordnet.synsets(s, pos=pos_tag)) # Select based on the number of synsets
215
+ if token.pos_ == "VERB":
216
+ if token.tag_ == "VBG": # Present participle (e.g., running)
217
+ synonym = synonym + 'ing'
218
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
219
+ synonym = synonym + 'ed'
220
+ elif token.tag_ == "VBZ": # Third-person singular present
221
+ synonym = synonym + 's'
222
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
223
+ synonym += 's' if not synonym.endswith('s') else ""
224
+ rephrased_text.append(synonym)
225
+ else:
226
+ rephrased_text.append(token.text)
227
  else:
228
  rephrased_text.append(token.text)
229
 
230
  return ' '.join(rephrased_text)
231
 
232
+ # Retain the structure of the input text (headings, paragraphs, line breaks)
233
  def retain_structure(text):
234
  lines = text.split("\n")
235
  formatted_lines = []
 
243
  return "\n".join(formatted_lines)
244
 
245
  # Function to paraphrase and correct grammar with enhanced accuracy and retain structure
246
+ def paraphrase_and_correct(text):
247
+ # Retain the structure (headings, paragraphs, line breaks)
248
  structured_text = retain_structure(text)
249
 
250
+ # Remove meaningless or redundant words first
251
+ cleaned_text = remove_redundant_words(structured_text)
252
+
253
+ # Capitalize sentences and nouns
254
+ paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
255
 
256
+ # Ensure first letter of each sentence is capitalized
 
 
257
  paraphrased_text = force_first_letter_capital(paraphrased_text)
258
+
259
+ # Handle possessives properly
260
  paraphrased_text = handle_possessives(paraphrased_text)
261
+
262
+ # Apply grammatical corrections
263
  paraphrased_text = correct_article_errors(paraphrased_text)
264
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
265
  paraphrased_text = correct_tense_errors(paraphrased_text)
266
  paraphrased_text = correct_double_negatives(paraphrased_text)
267
  paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
268
+
269
+ # Rephrase with synonyms while maintaining grammatical forms
270
+ paraphrased_text = rephrase_with_synonyms(paraphrased_text)
271
+
272
+ # Correct spelling errors
273
  paraphrased_text = correct_spelling(paraphrased_text)
274
+
275
  return paraphrased_text
276
 
277
  # Gradio app setup with two tabs
 
291
  result2 = gr.Textbox(lines=5, label='Corrected Text')
292
 
293
  # Connect the paraphrasing and correction function to the button
294
+ button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
295
 
296
+ demo.launch(share=True) # Share=True to create a public link