sashtech commited on
Commit
7c41997
·
verified ·
1 Parent(s): 19c632c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -57
app.py CHANGED
@@ -7,12 +7,14 @@ import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
  import re
 
10
 
11
  # Initialize the English text classification pipeline for AI detection
12
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
13
 
14
  # Initialize the spell checker
15
  spell = SpellChecker()
 
16
 
17
  # Ensure necessary NLTK data is downloaded
18
  nltk.download('wordnet')
@@ -35,7 +37,7 @@ def get_synonyms_nltk(word, pos):
35
  synsets = wordnet.synsets(word, pos=pos)
36
  if synsets:
37
  lemmas = synsets[0].lemmas()
38
- return [lemma.name() for lemma in lemmas]
39
  return []
40
 
41
  # Function to remove redundant and meaningless words
@@ -68,14 +70,14 @@ def correct_tense_errors(text):
68
  doc = nlp(text)
69
  corrected_text = []
70
  for token in doc:
71
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
72
  lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
73
  corrected_text.append(lemma)
74
  else:
75
  corrected_text.append(token.text)
76
  return ' '.join(corrected_text)
77
 
78
- # Function to correct singular/plural errors
79
  def correct_singular_plural_errors(text):
80
  doc = nlp(text)
81
  corrected_text = []
@@ -84,12 +86,12 @@ def correct_singular_plural_errors(text):
84
  if token.pos_ == "NOUN":
85
  if token.tag_ == "NN": # Singular noun
86
  if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
87
- corrected_text.append(token.lemma_ + 's')
88
  else:
89
  corrected_text.append(token.text)
90
  elif token.tag_ == "NNS": # Plural noun
91
  if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
92
- corrected_text.append(token.lemma_)
93
  else:
94
  corrected_text.append(token.text)
95
  else:
@@ -116,26 +118,23 @@ def correct_article_errors(text):
116
 
117
  # Function to get the correct synonym while maintaining verb form
118
  def replace_with_synonym(token):
119
- pos = None
120
- if token.pos_ == "VERB":
121
- pos = wordnet.VERB
122
- elif token.pos_ == "NOUN":
123
- pos = wordnet.NOUN
124
- elif token.pos_ == "ADJ":
125
- pos = wordnet.ADJ
126
- elif token.pos_ == "ADV":
127
- pos = wordnet.ADV
128
-
129
  synonyms = get_synonyms_nltk(token.lemma_, pos)
130
-
131
  if synonyms:
132
  synonym = synonyms[0]
133
- if token.tag_ == "VBG": # Present participle (e.g., running)
134
- synonym = synonym + 'ing'
135
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
136
- synonym = synonym + 'ed'
137
  elif token.tag_ == "VBZ": # Third-person singular present
138
- synonym = synonym + 's'
139
  return synonym
140
  return token.text
141
 
@@ -155,12 +154,12 @@ def ensure_subject_verb_agreement(text):
155
  doc = nlp(text)
156
  corrected_text = []
157
  for token in doc:
 
158
  if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
159
  if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
160
- corrected_text.append(token.head.lemma_ + "s")
161
  elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
162
- corrected_text.append(token.head.lemma_)
163
- corrected_text.append(token.text)
164
  return ' '.join(corrected_text)
165
 
166
  # Function to correct spelling errors
@@ -193,27 +192,24 @@ def rephrase_with_synonyms(text):
193
  rephrased_text.append("Earth")
194
  continue
195
 
196
- pos_tag = None
197
- if token.pos_ == "NOUN":
198
- pos_tag = wordnet.NOUN
199
- elif token.pos_ == "VERB":
200
- pos_tag = wordnet.VERB
201
- elif token.pos_ == "ADJ":
202
- pos_tag = wordnet.ADJ
203
- elif token.pos_ == "ADV":
204
- pos_tag = wordnet.ADV
205
 
206
  if pos_tag:
207
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
208
  if synonyms:
209
  synonym = synonyms[0] # Just using the first synonym for simplicity
210
  if token.pos_ == "VERB":
211
- if token.tag_ == "VBG": # Present participle (e.g., running)
212
- synonym = synonym + 'ing'
213
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
214
- synonym = synonym + 'ed'
215
  elif token.tag_ == "VBZ": # Third-person singular present
216
- synonym = synonym + 's'
217
  rephrased_text.append(synonym)
218
  else:
219
  rephrased_text.append(token.text)
@@ -234,37 +230,46 @@ def paraphrase_and_correct(text):
234
  paraphrased_text = correct_tense_errors(paraphrased_text)
235
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
236
  paraphrased_text = correct_article_errors(paraphrased_text)
237
- paraphrased_text = correct_double_negatives(paraphrased_text)
238
- paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
239
 
240
- # Correct spelling and punctuation
241
  paraphrased_text = correct_spelling(paraphrased_text)
 
 
242
  paraphrased_text = correct_punctuation(paraphrased_text)
243
- paraphrased_text = handle_possessives(paraphrased_text) # Handle possessives
244
 
245
- # Rephrase with synonyms
 
 
 
 
 
 
246
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
247
 
248
- # Force capitalization of the first letter of each sentence
249
- final_text = capitalize_sentences_and_nouns(paraphrased_text)
250
 
251
- return final_text
252
 
253
- # Gradio Interface
254
  def process_text(input_text):
255
  ai_label, ai_score = predict_en(input_text)
256
- corrected_text = paraphrase_and_correct(input_text)
257
- return ai_label, ai_score, corrected_text
 
 
 
 
 
258
 
259
- # Create Gradio interface
260
  iface = gr.Interface(
261
  fn=process_text,
262
- inputs="text",
263
- outputs=["text", "number", "text"],
264
- title="AI Content Detection and Grammar Correction",
265
- description="Enter text to detect AI-generated content and correct grammar."
266
  )
267
 
268
- # Launch the Gradio app
269
- if __name__ == "__main__":
270
- iface.launch()
 
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
  import re
10
+ from inflect import engine # For pluralization
11
 
12
  # Initialize the English text classification pipeline for AI detection
13
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
14
 
15
  # Initialize the spell checker
16
  spell = SpellChecker()
17
+ inflect_engine = engine()
18
 
19
  # Ensure necessary NLTK data is downloaded
20
  nltk.download('wordnet')
 
37
  synsets = wordnet.synsets(word, pos=pos)
38
  if synsets:
39
  lemmas = synsets[0].lemmas()
40
+ return [lemma.name() for lemma in lemmas if lemma.name() != word] # Avoid original word
41
  return []
42
 
43
  # Function to remove redundant and meaningless words
 
70
  doc = nlp(text)
71
  corrected_text = []
72
  for token in doc:
73
+ if token.pos_ == "VERB":
74
  lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
75
  corrected_text.append(lemma)
76
  else:
77
  corrected_text.append(token.text)
78
  return ' '.join(corrected_text)
79
 
80
+ # Function to correct singular/plural errors using inflect
81
  def correct_singular_plural_errors(text):
82
  doc = nlp(text)
83
  corrected_text = []
 
86
  if token.pos_ == "NOUN":
87
  if token.tag_ == "NN": # Singular noun
88
  if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
89
+ corrected_text.append(inflect_engine.plural(token.lemma_))
90
  else:
91
  corrected_text.append(token.text)
92
  elif token.tag_ == "NNS": # Plural noun
93
  if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
94
+ corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
95
  else:
96
  corrected_text.append(token.text)
97
  else:
 
118
 
119
  # Function to get the correct synonym while maintaining verb form
120
  def replace_with_synonym(token):
121
+ pos = {
122
+ "VERB": wordnet.VERB,
123
+ "NOUN": wordnet.NOUN,
124
+ "ADJ": wordnet.ADJ,
125
+ "ADV": wordnet.ADV
126
+ }.get(token.pos_, None)
127
+
 
 
 
128
  synonyms = get_synonyms_nltk(token.lemma_, pos)
129
+
130
  if synonyms:
131
  synonym = synonyms[0]
132
+ if token.tag_ == "VBG": # Present participle
133
+ synonym += 'ing'
134
+ elif token.tag_ in {"VBD", "VBN"}: # Past tense or past participle
135
+ synonym += 'ed'
136
  elif token.tag_ == "VBZ": # Third-person singular present
137
+ synonym += 's'
138
  return synonym
139
  return token.text
140
 
 
154
  doc = nlp(text)
155
  corrected_text = []
156
  for token in doc:
157
+ corrected_text.append(token.text)
158
  if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
159
  if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
160
+ corrected_text[-1] = token.head.lemma_ + "s"
161
  elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
162
+ corrected_text[-1] = token.head.lemma_
 
163
  return ' '.join(corrected_text)
164
 
165
  # Function to correct spelling errors
 
192
  rephrased_text.append("Earth")
193
  continue
194
 
195
+ pos_tag = {
196
+ "NOUN": wordnet.NOUN,
197
+ "VERB": wordnet.VERB,
198
+ "ADJ": wordnet.ADJ,
199
+ "ADV": wordnet.ADV
200
+ }.get(token.pos_, None)
 
 
 
201
 
202
  if pos_tag:
203
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
204
  if synonyms:
205
  synonym = synonyms[0] # Just using the first synonym for simplicity
206
  if token.pos_ == "VERB":
207
+ if token.tag_ == "VBG": # Present participle
208
+ synonym += 'ing'
209
+ elif token.tag_ in {"VBD", "VBN"}: # Past tense or past participle
210
+ synonym += 'ed'
211
  elif token.tag_ == "VBZ": # Third-person singular present
212
+ synonym += 's'
213
  rephrased_text.append(synonym)
214
  else:
215
  rephrased_text.append(token.text)
 
230
  paraphrased_text = correct_tense_errors(paraphrased_text)
231
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
232
  paraphrased_text = correct_article_errors(paraphrased_text)
 
 
233
 
234
+ # Correct spelling errors
235
  paraphrased_text = correct_spelling(paraphrased_text)
236
+
237
+ # Correct punctuation issues
238
  paraphrased_text = correct_punctuation(paraphrased_text)
 
239
 
240
+ # Handle possessives
241
+ paraphrased_text = handle_possessives(paraphrased_text)
242
+
243
+ # Ensure subject-verb agreement
244
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
245
+
246
+ # Replace with synonyms
247
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
248
 
249
+ # Correct for double negatives
250
+ paraphrased_text = correct_double_negatives(paraphrased_text)
251
 
252
+ return paraphrased_text
253
 
254
+ # Function to handle the user interface
255
  def process_text(input_text):
256
  ai_label, ai_score = predict_en(input_text)
257
+ ai_result = f"AI Detected: {ai_label} (Score: {ai_score:.2f})"
258
+
259
+ if ai_label == "HUMAN":
260
+ corrected_text = paraphrase_and_correct(input_text)
261
+ return corrected_text, ai_result
262
+ else:
263
+ return "The text seems to be AI-generated; no correction applied.", ai_result
264
 
265
+ # Gradio interface
266
  iface = gr.Interface(
267
  fn=process_text,
268
+ inputs=gr.Textbox(lines=10, placeholder="Enter your text here..."),
269
+ outputs=[gr.Textbox(label="Corrected Text"), gr.Textbox(label="AI Detection Result")],
270
+ title="Text Correction and AI Detection",
271
+ description="This app corrects grammar, spelling, and punctuation while also detecting AI-generated content."
272
  )
273
 
274
+ # Launch the interface
275
+ iface.launch()