sashtech commited on
Commit
42515fd
·
verified ·
1 Parent(s): 353216c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -113
app.py CHANGED
@@ -26,15 +26,13 @@ spell = SpellChecker()
26
  inflect_engine = inflect.engine()
27
 
28
  # Ensure necessary NLTK data is downloaded
29
- nltk.download('wordnet')
30
- nltk.download('omw-1.4')
31
 
32
- # Function to predict the label and score for English text (AI Detection)
33
  def predict_en(text):
34
  res = pipeline_en(text)[0]
35
  return res['label'], res['score']
36
 
37
- # Function to get synonyms using NLTK WordNet
38
  def get_synonyms_nltk(word, pos):
39
  synsets = wordnet.synsets(word, pos=pos)
40
  if synsets:
@@ -42,14 +40,12 @@ def get_synonyms_nltk(word, pos):
42
  return [lemma.name() for lemma in lemmas if lemma.name() != word]
43
  return []
44
 
45
- # Function to remove redundant and meaningless words
46
  def remove_redundant_words(text):
47
  doc = nlp(text)
48
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
49
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
50
  return ' '.join(filtered_text)
51
 
52
- # Function to capitalize the first letter of sentences and proper nouns
53
  def capitalize_sentences_and_nouns(text):
54
  doc = nlp(text)
55
  corrected_text = []
@@ -57,9 +53,7 @@ def capitalize_sentences_and_nouns(text):
57
  for sent in doc.sents:
58
  sentence = []
59
  for token in sent:
60
- if token.i == sent.start: # First word of the sentence
61
- sentence.append(token.text.capitalize())
62
- elif token.pos_ == "PROPN": # Proper noun
63
  sentence.append(token.text.capitalize())
64
  else:
65
  sentence.append(token.text)
@@ -67,7 +61,6 @@ def capitalize_sentences_and_nouns(text):
67
 
68
  return ' '.join(corrected_text)
69
 
70
- # Function to correct tense errors in a sentence
71
  def correct_tense_errors(text):
72
  doc = nlp(text)
73
  corrected_text = []
@@ -79,109 +72,75 @@ def correct_tense_errors(text):
79
  corrected_text.append(token.text)
80
  return ' '.join(corrected_text)
81
 
82
- # Function to correct singular/plural errors
83
  def correct_singular_plural_errors(text):
84
  doc = nlp(text)
85
  corrected_text = []
86
 
87
  for token in doc:
88
  if token.pos_ == "NOUN":
89
- if token.tag_ == "NN": # Singular noun
90
- if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
91
- corrected_text.append(inflect_engine.plural(token.lemma_))
92
- else:
93
- corrected_text.append(token.text)
94
- elif token.tag_ == "NNS": # Plural noun
95
- if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
96
- corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
97
- else:
98
- corrected_text.append(token.text)
99
  else:
100
  corrected_text.append(token.text)
101
 
102
  return ' '.join(corrected_text)
103
 
104
- # Function to check and correct article errors
105
  def correct_article_errors(text):
106
  doc = nlp(text)
107
  corrected_text = []
108
- for token in doc:
109
- if token.text in ['a', 'an']:
110
- next_token = token.nbor(1)
111
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
112
  corrected_text.append("an")
113
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
114
- corrected_text.append("a")
115
  else:
116
- corrected_text.append(token.text)
117
  else:
118
  corrected_text.append(token.text)
119
  return ' '.join(corrected_text)
120
 
121
- # Function to get the correct synonym while maintaining verb form
122
- def replace_with_synonym(token):
123
- pos = None
124
- if token.pos_ == "VERB":
125
- pos = wordnet.VERB
126
- elif token.pos_ == "NOUN":
127
- pos = wordnet.NOUN
128
- elif token.pos_ == "ADJ":
129
- pos = wordnet.ADJ
130
- elif token.pos_ == "ADV":
131
- pos = wordnet.ADV
132
-
133
- synonyms = get_synonyms_nltk(token.lemma_, pos)
134
-
135
- if synonyms:
136
- synonym = synonyms[0]
137
- if token.tag_ == "VBG": # Present participle (e.g., running)
138
- synonym = synonym + 'ing'
139
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
140
- synonym = synonym + 'ed'
141
- elif token.tag_ == "VBZ": # Third-person singular present
142
- synonym = synonym + 's'
143
- return synonym
144
- return token.text
145
-
146
- # Function to check for and avoid double negatives
147
  def correct_double_negatives(text):
148
  doc = nlp(text)
149
  corrected_text = []
150
  for token in doc:
151
- if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
152
- corrected_text.append("always")
153
  else:
154
  corrected_text.append(token.text)
155
  return ' '.join(corrected_text)
156
 
157
- # Function to ensure subject-verb agreement
158
  def ensure_subject_verb_agreement(text):
159
  doc = nlp(text)
160
  corrected_text = []
161
  for token in doc:
162
  if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
163
- if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
164
  corrected_text.append(token.head.lemma_ + "s")
165
- elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
166
  corrected_text.append(token.head.lemma_)
167
- corrected_text.append(token.text)
 
 
 
168
  return ' '.join(corrected_text)
169
 
170
- # Enhance the spell checker function
171
  def enhanced_spell_check(text):
172
  words = text.split()
173
  corrected_words = []
174
  for word in words:
175
- if '_' in word: # Handle cases like 'animate_being'
176
  sub_words = word.split('_')
177
- corrected_sub_words = [spell.correction(w) for w in sub_words]
178
  corrected_words.append('_'.join(corrected_sub_words))
179
  else:
180
- corrected_word = spell.correction(word)
181
- corrected_words.append(corrected_word if corrected_word else word)
182
  return ' '.join(corrected_words)
183
 
184
- # Function to correct common semantic errors
185
  def correct_semantic_errors(text):
186
  semantic_corrections = {
187
  "animate_being": "animal",
@@ -199,69 +158,99 @@ def correct_semantic_errors(text):
199
  "keeping": "maintaining",
200
  "lend": "contribute",
201
  "better": "improve",
202
- "is": "s",
203
- "wite": "write",
204
- "alos": "also",
205
- "ads": "as",
206
- "dictuionatr": "dictionary",
207
- "wors": "words"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  }
209
 
210
  words = text.split()
211
  corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
212
  return ' '.join(corrected_words)
213
 
214
- # Enhance the punctuation correction function
215
  def enhance_punctuation(text):
216
- # Remove extra spaces before punctuation
217
  text = re.sub(r'\s+([?.!,";:])', r'\1', text)
218
-
219
- # Add space after punctuation if it's missing
220
  text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)
221
-
222
- # Correct spacing for quotes
223
  text = re.sub(r'\s*"\s*', '" ', text).strip()
224
-
225
- # Ensure proper capitalization after sentence-ending punctuation
226
  text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
227
-
 
 
 
 
 
228
  return text
229
 
230
- # Function to handle possessives
231
  def handle_possessives(text):
232
  text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
233
  return text
234
 
235
- # Function to rephrase text and replace words with their synonyms while maintaining form
236
  def rephrase_with_synonyms(text):
237
  doc = nlp(text)
238
  rephrased_text = []
239
 
240
  for token in doc:
241
- if token.pos_ == "NOUN" and token.text.lower() == "earth":
242
  rephrased_text.append("Earth")
243
  continue
244
 
245
  pos_tag = None
246
- if token.pos_ == "NOUN":
247
- pos_tag = wordnet.NOUN
248
- elif token.pos_ == "VERB":
249
- pos_tag = wordnet.VERB
250
- elif token.pos_ == "ADJ":
251
- pos_tag = wordnet.ADJ
252
- elif token.pos_ == "ADV":
253
- pos_tag = wordnet.ADV
254
 
255
  if pos_tag:
256
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
257
  if synonyms:
258
- synonym = synonyms[0] # Just using the first synonym for simplicity
259
  if token.pos_ == "VERB":
260
- if token.tag_ == "VBG": # Present participle (e.g., running)
261
  synonym = synonym + 'ing'
262
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
263
  synonym = synonym + 'ed'
264
- elif token.tag_ == "VBZ": # Third-person singular present
265
  synonym = synonym + 's'
266
  rephrased_text.append(synonym)
267
  else:
@@ -271,40 +260,32 @@ def rephrase_with_synonyms(text):
271
 
272
  return ' '.join(rephrased_text)
273
 
274
- # Function to detect AI-generated content
275
- def detect_ai(text):
276
- label, score = predict_en(text)
277
- return label, score
278
-
279
- # Enhance the paraphrase_and_correct function
280
  def paraphrase_and_correct(text):
281
- # Apply enhanced spell checking
282
  text = enhanced_spell_check(text)
283
-
284
- # Correct semantic errors
285
  text = correct_semantic_errors(text)
286
-
287
- # Apply existing corrections
288
  text = remove_redundant_words(text)
289
  text = capitalize_sentences_and_nouns(text)
290
  text = correct_tense_errors(text)
291
  text = correct_singular_plural_errors(text)
292
  text = correct_article_errors(text)
293
  text = enhance_punctuation(text)
 
294
  text = handle_possessives(text)
295
  text = rephrase_with_synonyms(text)
296
  text = correct_double_negatives(text)
297
  text = ensure_subject_verb_agreement(text)
298
-
299
  return text
300
 
301
- # Gradio interface setup
 
 
 
302
  def gradio_interface(text):
303
  label, score = detect_ai(text)
304
  corrected_text = paraphrase_and_correct(text)
305
  return {label: score}, corrected_text
306
 
307
- # Create Gradio interface
308
  iface = gr.Interface(
309
  fn=gradio_interface,
310
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
@@ -316,5 +297,5 @@ iface = gr.Interface(
316
  description="Detect AI-generated content and correct grammar issues."
317
  )
318
 
319
- # Launch the app
320
- iface.launch()
 
26
  inflect_engine = inflect.engine()
27
 
28
  # Ensure necessary NLTK data is downloaded
29
+ nltk.download('wordnet', quiet=True)
30
+ nltk.download('omw-1.4', quiet=True)
31
 
 
32
  def predict_en(text):
33
  res = pipeline_en(text)[0]
34
  return res['label'], res['score']
35
 
 
36
  def get_synonyms_nltk(word, pos):
37
  synsets = wordnet.synsets(word, pos=pos)
38
  if synsets:
 
40
  return [lemma.name() for lemma in lemmas if lemma.name() != word]
41
  return []
42
 
 
43
  def remove_redundant_words(text):
44
  doc = nlp(text)
45
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
46
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
47
  return ' '.join(filtered_text)
48
 
 
49
  def capitalize_sentences_and_nouns(text):
50
  doc = nlp(text)
51
  corrected_text = []
 
53
  for sent in doc.sents:
54
  sentence = []
55
  for token in sent:
56
+ if token.i == sent.start or token.pos_ == "PROPN":
 
 
57
  sentence.append(token.text.capitalize())
58
  else:
59
  sentence.append(token.text)
 
61
 
62
  return ' '.join(corrected_text)
63
 
 
64
  def correct_tense_errors(text):
65
  doc = nlp(text)
66
  corrected_text = []
 
72
  corrected_text.append(token.text)
73
  return ' '.join(corrected_text)
74
 
 
75
  def correct_singular_plural_errors(text):
76
  doc = nlp(text)
77
  corrected_text = []
78
 
79
  for token in doc:
80
  if token.pos_ == "NOUN":
81
+ if token.tag_ == "NN" and any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
82
+ corrected_text.append(inflect_engine.plural(token.lemma_))
83
+ elif token.tag_ == "NNS" and any(child.text.lower() in ['a', 'one'] for child in token.head.children):
84
+ corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
85
+ else:
86
+ corrected_text.append(token.text)
 
 
 
 
87
  else:
88
  corrected_text.append(token.text)
89
 
90
  return ' '.join(corrected_text)
91
 
 
92
  def correct_article_errors(text):
93
  doc = nlp(text)
94
  corrected_text = []
95
+ for i, token in enumerate(doc):
96
+ if token.text.lower() in ['a', 'an']:
97
+ next_token = doc[i + 1] if i + 1 < len(doc) else None
98
+ if next_token and next_token.text[0].lower() in "aeiou":
99
  corrected_text.append("an")
 
 
100
  else:
101
+ corrected_text.append("a")
102
  else:
103
  corrected_text.append(token.text)
104
  return ' '.join(corrected_text)
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  def correct_double_negatives(text):
107
  doc = nlp(text)
108
  corrected_text = []
109
  for token in doc:
110
+ if token.dep_ == "neg" and any(child.dep_ == "neg" for child in token.head.children):
111
+ continue
112
  else:
113
  corrected_text.append(token.text)
114
  return ' '.join(corrected_text)
115
 
 
116
  def ensure_subject_verb_agreement(text):
117
  doc = nlp(text)
118
  corrected_text = []
119
  for token in doc:
120
  if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
121
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ":
122
  corrected_text.append(token.head.lemma_ + "s")
123
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
124
  corrected_text.append(token.head.lemma_)
125
+ else:
126
+ corrected_text.append(token.head.text)
127
+ else:
128
+ corrected_text.append(token.text)
129
  return ' '.join(corrected_text)
130
 
 
131
  def enhanced_spell_check(text):
132
  words = text.split()
133
  corrected_words = []
134
  for word in words:
135
+ if '_' in word:
136
  sub_words = word.split('_')
137
+ corrected_sub_words = [spell.correction(w) or w for w in sub_words]
138
  corrected_words.append('_'.join(corrected_sub_words))
139
  else:
140
+ corrected_word = spell.correction(word) or word
141
+ corrected_words.append(corrected_word)
142
  return ' '.join(corrected_words)
143
 
 
144
  def correct_semantic_errors(text):
145
  semantic_corrections = {
146
  "animate_being": "animal",
 
158
  "keeping": "maintaining",
159
  "lend": "contribute",
160
  "better": "improve",
161
+ "cardinal": "key",
162
+ "expeditiously": "efficiently",
163
+ "marauder": "predator",
164
+ "quarry": "prey",
165
+ "forestalling": "preventing",
166
+ "bend": "turn",
167
+ "works": "plant",
168
+ "croping": "grazing",
169
+ "flora": "vegetation",
170
+ "dynamical": "dynamic",
171
+ "alteration": "change",
172
+ "add-on": "addition",
173
+ "indispensable": "essential",
174
+ "nutrient": "food",
175
+ "harvest": "crops",
176
+ "pollenateing": "pollinating",
177
+ "divers": "diverse",
178
+ "beginning": "source",
179
+ "homo": "humans",
180
+ "fall_in": "collapse",
181
+ "takeing": "leading",
182
+ "coinage": "species",
183
+ "trust": "rely",
184
+ "angleworm": "earthworm",
185
+ "interrupt": "break",
186
+ "affair": "matter",
187
+ "air_out": "aerate",
188
+ "alimentary": "nutrient",
189
+ "distributeed": "spread",
190
+ "country": "areas",
191
+ "reconstruct": "restore",
192
+ "debauched": "degraded",
193
+ "giant": "whales",
194
+ "organic_structure": "bodies",
195
+ "decease": "die",
196
+ "carcase": "carcasses",
197
+ "pin_downing": "trapping",
198
+ "cut_downs": "reduces",
199
+ "ambiance": "atmosphere",
200
+ "extenuateing": "mitigating",
201
+ "decision": "conclusion",
202
+ "doing": "making",
203
+ "prolongs": "sustains",
204
+ "home_ground": "habitats",
205
+ "continueing": "preserving",
206
+ "populateing": "living",
207
+ "beingness": "beings"
208
  }
209
 
210
  words = text.split()
211
  corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
212
  return ' '.join(corrected_words)
213
 
 
214
  def enhance_punctuation(text):
 
215
  text = re.sub(r'\s+([?.!,";:])', r'\1', text)
 
 
216
  text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)
 
 
217
  text = re.sub(r'\s*"\s*', '" ', text).strip()
 
 
218
  text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
219
+ text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text)
220
+ return text
221
+
222
+ def correct_apostrophes(text):
223
+ text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text)
224
+ text = re.sub(r"\b(\w+)s'\b", r"\1s'", text)
225
  return text
226
 
 
227
  def handle_possessives(text):
228
  text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
229
  return text
230
 
 
231
  def rephrase_with_synonyms(text):
232
  doc = nlp(text)
233
  rephrased_text = []
234
 
235
  for token in doc:
236
+ if token.text.lower() == "earth":
237
  rephrased_text.append("Earth")
238
  continue
239
 
240
  pos_tag = None
241
+ if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
242
+ pos_tag = getattr(wordnet, token.pos_)
 
 
 
 
 
 
243
 
244
  if pos_tag:
245
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
246
  if synonyms:
247
+ synonym = synonyms[0]
248
  if token.pos_ == "VERB":
249
+ if token.tag_ == "VBG":
250
  synonym = synonym + 'ing'
251
+ elif token.tag_ in ["VBD", "VBN"]:
252
  synonym = synonym + 'ed'
253
+ elif token.tag_ == "VBZ":
254
  synonym = synonym + 's'
255
  rephrased_text.append(synonym)
256
  else:
 
260
 
261
  return ' '.join(rephrased_text)
262
 
 
 
 
 
 
 
263
  def paraphrase_and_correct(text):
 
264
  text = enhanced_spell_check(text)
 
 
265
  text = correct_semantic_errors(text)
 
 
266
  text = remove_redundant_words(text)
267
  text = capitalize_sentences_and_nouns(text)
268
  text = correct_tense_errors(text)
269
  text = correct_singular_plural_errors(text)
270
  text = correct_article_errors(text)
271
  text = enhance_punctuation(text)
272
+ text = correct_apostrophes(text)
273
  text = handle_possessives(text)
274
  text = rephrase_with_synonyms(text)
275
  text = correct_double_negatives(text)
276
  text = ensure_subject_verb_agreement(text)
277
+ text = ' '.join(word.capitalize() if word.lower() in ['i', 'earth'] else word for word in text.split())
278
  return text
279
 
280
+ def detect_ai(text):
281
+ label, score = predict_en(text)
282
+ return label, score
283
+
284
  def gradio_interface(text):
285
  label, score = detect_ai(text)
286
  corrected_text = paraphrase_and_correct(text)
287
  return {label: score}, corrected_text
288
 
 
289
  iface = gr.Interface(
290
  fn=gradio_interface,
291
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
 
297
  description="Detect AI-generated content and correct grammar issues."
298
  )
299
 
300
+ if __name__ == "__main__":
301
+ iface.launch()