sashtech commited on
Commit
f18ad55
·
verified ·
1 Parent(s): 00cd115

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -190
app.py CHANGED
@@ -12,7 +12,7 @@ from inflect import engine # For pluralization
12
  # Initialize the English text classification pipeline for AI detection
13
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
14
 
15
- # Initialize the spell checker
16
  spell = SpellChecker()
17
  inflect_engine = engine()
18
 
@@ -27,7 +27,7 @@ except OSError:
27
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
28
  nlp = spacy.load("en_core_web_sm")
29
 
30
- # Function to predict the label and score for English text (AI Detection)
31
  def predict_en(text):
32
  res = pipeline_en(text)[0]
33
  return res['label'], res['score']
@@ -40,158 +40,23 @@ def get_synonyms_nltk(word, pos):
40
  return [lemma.name() for lemma in lemmas if lemma.name() != word] # Avoid original word
41
  return []
42
 
43
- # Function to remove redundant and meaningless words
44
  def remove_redundant_words(text):
45
- doc = nlp(text)
46
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
47
- filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
48
- return ' '.join(filtered_text)
49
-
50
- # Function to capitalize the first letter of sentences and proper nouns
51
- def capitalize_sentences_and_nouns(text):
52
- doc = nlp(text)
53
- corrected_text = []
54
-
55
- for sent in doc.sents:
56
- sentence = []
57
- for token in sent:
58
- if token.i == sent.start: # First word of the sentence
59
- sentence.append(token.text.capitalize())
60
- elif token.pos_ == "PROPN": # Proper noun
61
- sentence.append(token.text.capitalize())
62
- else:
63
- sentence.append(token.text)
64
- corrected_text.append(' '.join(sentence))
65
-
66
- return ' '.join(corrected_text)
67
-
68
- # Function to correct tense errors in a sentence
69
- def correct_tense_errors(text):
70
- doc = nlp(text)
71
- corrected_text = []
72
- for token in doc:
73
- if token.pos_ == "VERB":
74
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
75
- corrected_text.append(lemma)
76
- else:
77
- corrected_text.append(token.text)
78
- return ' '.join(corrected_text)
79
-
80
- # Function to correct singular/plural errors using inflect
81
- def correct_singular_plural_errors(text):
82
- doc = nlp(text)
83
- corrected_text = []
84
-
85
- for token in doc:
86
- if token.pos_ == "NOUN":
87
- if token.tag_ == "NN": # Singular noun
88
- if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
89
- corrected_text.append(inflect_engine.plural(token.lemma_))
90
- else:
91
- corrected_text.append(token.text)
92
- elif token.tag_ == "NNS": # Plural noun
93
- if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
94
- corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
95
- else:
96
- corrected_text.append(token.text)
97
- else:
98
- corrected_text.append(token.text)
99
-
100
- return ' '.join(corrected_text)
101
-
102
- # Function to check and correct article errors
103
- def correct_article_errors(text):
104
- doc = nlp(text)
105
- corrected_text = []
106
- for token in doc:
107
- if token.text in ['a', 'an']:
108
- next_token = token.nbor(1)
109
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
110
- corrected_text.append("an")
111
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
112
- corrected_text.append("a")
113
- else:
114
- corrected_text.append(token.text)
115
- else:
116
- corrected_text.append(token.text)
117
- return ' '.join(corrected_text)
118
-
119
- # Function to get the correct synonym while maintaining verb form
120
- def replace_with_synonym(token):
121
- pos = {
122
- "VERB": wordnet.VERB,
123
- "NOUN": wordnet.NOUN,
124
- "ADJ": wordnet.ADJ,
125
- "ADV": wordnet.ADV
126
- }.get(token.pos_, None)
127
-
128
- synonyms = get_synonyms_nltk(token.lemma_, pos)
129
-
130
- if synonyms:
131
- synonym = synonyms[0]
132
- if token.tag_ == "VBG": # Present participle
133
- synonym += 'ing'
134
- elif token.tag_ in {"VBD", "VBN"}: # Past tense or past participle
135
- synonym += 'ed'
136
- elif token.tag_ == "VBZ": # Third-person singular present
137
- synonym += 's'
138
- return synonym
139
- return token.text
140
-
141
- # Function to check for and avoid double negatives
142
- def correct_double_negatives(text):
143
- doc = nlp(text)
144
- corrected_text = []
145
- for token in doc:
146
- if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
147
- corrected_text.append("always")
148
- else:
149
- corrected_text.append(token.text)
150
- return ' '.join(corrected_text)
151
-
152
- # Function to ensure subject-verb agreement
153
- def ensure_subject_verb_agreement(text):
154
- doc = nlp(text)
155
- corrected_text = []
156
- for token in doc:
157
- corrected_text.append(token.text)
158
- if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
159
- if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
160
- corrected_text[-1] = token.head.lemma_ + "s"
161
- elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
162
- corrected_text[-1] = token.head.lemma_
163
- return ' '.join(corrected_text)
164
 
165
  # Function to correct spelling errors
166
  def correct_spelling(text):
167
  words = text.split()
168
- corrected_words = []
169
- for word in words:
170
- corrected_word = spell.correction(word)
171
- corrected_words.append(corrected_word if corrected_word else word) # Keep original if correction is None
172
  return ' '.join(corrected_words)
173
 
174
- # Function to correct punctuation issues
175
- def correct_punctuation(text):
176
- text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove space before punctuation
177
- text = re.sub(r'([?.!,";:])\s+', r'\1 ', text) # Ensure a single space after punctuation
178
- return text
179
-
180
- # Function to ensure correct handling of possessive forms
181
- def handle_possessives(text):
182
- text = re.sub(r"\b(\w+)'s\b", r"\1's", text) # Preserve possessive forms
183
- return text
184
-
185
- # Function to rephrase text and replace words with their synonyms while maintaining form
186
  def rephrase_with_synonyms(text):
187
  doc = nlp(text)
188
  rephrased_text = []
189
 
190
  for token in doc:
191
- if token.pos_ == "NOUN" and token.text.lower() == "earth":
192
- rephrased_text.append("Earth")
193
- continue
194
-
195
  pos_tag = {
196
  "NOUN": wordnet.NOUN,
197
  "VERB": wordnet.VERB,
@@ -201,74 +66,36 @@ def rephrase_with_synonyms(text):
201
 
202
  if pos_tag:
203
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
204
- if synonyms:
205
- synonym = synonyms[0] # Just using the first synonym for simplicity
206
- if token.pos_ == "VERB":
207
- if token.tag_ == "VBG": # Present participle
208
- synonym += 'ing'
209
- elif token.tag_ in {"VBD", "VBN"}: # Past tense or past participle
210
- synonym += 'ed'
211
- elif token.tag_ == "VBZ": # Third-person singular present
212
- synonym += 's'
213
- rephrased_text.append(synonym)
214
- else:
215
- rephrased_text.append(token.text)
216
  else:
217
  rephrased_text.append(token.text)
218
 
219
  return ' '.join(rephrased_text)
220
 
221
- # Function to paraphrase and correct grammar with enhanced accuracy
222
  def paraphrase_and_correct(text):
223
- # Remove meaningless or redundant words first
224
  cleaned_text = remove_redundant_words(text)
225
-
226
- # Capitalize sentences and nouns
227
- paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
228
-
229
- # Correct tense and singular/plural errors
230
- paraphrased_text = correct_tense_errors(paraphrased_text)
231
- paraphrased_text = correct_singular_plural_errors(paraphrased_text)
232
- paraphrased_text = correct_article_errors(paraphrased_text)
233
-
234
- # Correct spelling errors
235
- paraphrased_text = correct_spelling(paraphrased_text)
236
-
237
- # Correct punctuation issues
238
- paraphrased_text = correct_punctuation(paraphrased_text)
239
-
240
- # Handle possessives
241
- paraphrased_text = handle_possessives(paraphrased_text)
242
-
243
- # Ensure subject-verb agreement
244
- paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
245
-
246
- # Replace with synonyms
247
- paraphrased_text = rephrase_with_synonyms(paraphrased_text)
248
-
249
- # Correct for double negatives
250
- paraphrased_text = correct_double_negatives(paraphrased_text)
251
-
252
- return paraphrased_text
253
 
254
- # Function to handle the user interface
255
  def process_text(input_text):
256
  ai_label, ai_score = predict_en(input_text)
257
- ai_result = f"AI Detected: {ai_label} (Score: {ai_score:.2f})"
258
 
259
  if ai_label == "HUMAN":
260
  corrected_text = paraphrase_and_correct(input_text)
261
- return corrected_text, ai_result
262
  else:
263
- return "The text seems to be AI-generated; no correction applied.", ai_result
264
 
265
  # Gradio interface
266
  iface = gr.Interface(
267
  fn=process_text,
268
  inputs=gr.Textbox(lines=10, placeholder="Enter your text here..."),
269
- outputs=[gr.Textbox(label="Corrected Text"), gr.Textbox(label="AI Detection Result")],
270
- title="Text Correction and AI Detection",
271
- description="This app corrects grammar, spelling, and punctuation while also detecting AI-generated content."
272
  )
273
 
274
  # Launch the interface
 
12
  # Initialize the English text classification pipeline for AI detection
13
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
14
 
15
+ # Initialize the spell checker and inflect engine
16
  spell = SpellChecker()
17
  inflect_engine = engine()
18
 
 
27
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
28
  nlp = spacy.load("en_core_web_sm")
29
 
30
+ # Function to predict AI detection
31
  def predict_en(text):
32
  res = pipeline_en(text)[0]
33
  return res['label'], res['score']
 
40
  return [lemma.name() for lemma in lemmas if lemma.name() != word] # Avoid original word
41
  return []
42
 
43
+ # Function to remove redundant words
44
  def remove_redundant_words(text):
 
45
  meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
46
+ return ' '.join(word for word in text.split() if word.lower() not in meaningless_words)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # Function to correct spelling errors
49
  def correct_spelling(text):
50
  words = text.split()
51
+ corrected_words = [spell.correction(word) for word in words]
 
 
 
52
  return ' '.join(corrected_words)
53
 
54
+ # Function to rephrase text with synonyms
 
 
 
 
 
 
 
 
 
 
 
55
  def rephrase_with_synonyms(text):
56
  doc = nlp(text)
57
  rephrased_text = []
58
 
59
  for token in doc:
 
 
 
 
60
  pos_tag = {
61
  "NOUN": wordnet.NOUN,
62
  "VERB": wordnet.VERB,
 
66
 
67
  if pos_tag:
68
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
69
+ synonym = synonyms[0] if synonyms else token.text
70
+ rephrased_text.append(synonym)
 
 
 
 
 
 
 
 
 
 
71
  else:
72
  rephrased_text.append(token.text)
73
 
74
  return ' '.join(rephrased_text)
75
 
76
+ # Function to paraphrase and correct grammar
77
  def paraphrase_and_correct(text):
 
78
  cleaned_text = remove_redundant_words(text)
79
+ cleaned_text = correct_spelling(cleaned_text)
80
+ return rephrase_with_synonyms(cleaned_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ # Function to handle user input
83
  def process_text(input_text):
84
  ai_label, ai_score = predict_en(input_text)
 
85
 
86
  if ai_label == "HUMAN":
87
  corrected_text = paraphrase_and_correct(input_text)
88
+ return corrected_text
89
  else:
90
+ return "The text seems to be AI-generated; no correction applied."
91
 
92
  # Gradio interface
93
  iface = gr.Interface(
94
  fn=process_text,
95
  inputs=gr.Textbox(lines=10, placeholder="Enter your text here..."),
96
+ outputs=gr.Textbox(label="Corrected Text"),
97
+ title="Text Correction and Rephrasing",
98
+ description="This app corrects and rephrases text while detecting AI-generated content."
99
  )
100
 
101
  # Launch the interface