sashtech commited on
Commit
fa69dbc
·
verified ·
1 Parent(s): 42515fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -203
app.py CHANGED
@@ -1,66 +1,25 @@
1
- import os
2
- import gradio as gr
3
- from transformers import pipeline
4
- import spacy
5
- import nltk
6
- from nltk.corpus import wordnet
7
- from spellchecker import SpellChecker
8
- import re
9
- import inflect
10
-
11
- # Initialize components
12
- try:
13
- nlp = spacy.load("en_core_web_sm")
14
- except OSError:
15
- print("Downloading spaCy model...")
16
- spacy.cli.download("en_core_web_sm")
17
- nlp = spacy.load("en_core_web_sm")
18
-
19
- # Initialize the English text classification pipeline for AI detection
20
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
21
-
22
- # Initialize the spell checker
23
- spell = SpellChecker()
24
-
25
- # Initialize the inflect engine for pluralization
26
- inflect_engine = inflect.engine()
27
-
28
- # Ensure necessary NLTK data is downloaded
29
- nltk.download('wordnet', quiet=True)
30
- nltk.download('omw-1.4', quiet=True)
31
-
32
- def predict_en(text):
33
- res = pipeline_en(text)[0]
34
- return res['label'], res['score']
35
-
36
- def get_synonyms_nltk(word, pos):
37
- synsets = wordnet.synsets(word, pos=pos)
38
- if synsets:
39
- lemmas = synsets[0].lemmas()
40
- return [lemma.name() for lemma in lemmas if lemma.name() != word]
41
- return []
42
-
43
  def remove_redundant_words(text):
44
  doc = nlp(text)
45
- meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
46
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
47
  return ' '.join(filtered_text)
48
 
 
49
  def capitalize_sentences_and_nouns(text):
50
  doc = nlp(text)
51
  corrected_text = []
52
-
53
  for sent in doc.sents:
54
  sentence = []
55
  for token in sent:
56
  if token.i == sent.start or token.pos_ == "PROPN":
57
  sentence.append(token.text.capitalize())
58
  else:
59
- sentence.append(token.text)
60
  corrected_text.append(' '.join(sentence))
61
-
62
  return ' '.join(corrected_text)
63
 
 
64
  def correct_tense_errors(text):
65
  doc = nlp(text)
66
  corrected_text = []
@@ -72,47 +31,7 @@ def correct_tense_errors(text):
72
  corrected_text.append(token.text)
73
  return ' '.join(corrected_text)
74
 
75
- def correct_singular_plural_errors(text):
76
- doc = nlp(text)
77
- corrected_text = []
78
-
79
- for token in doc:
80
- if token.pos_ == "NOUN":
81
- if token.tag_ == "NN" and any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
82
- corrected_text.append(inflect_engine.plural(token.lemma_))
83
- elif token.tag_ == "NNS" and any(child.text.lower() in ['a', 'one'] for child in token.head.children):
84
- corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
85
- else:
86
- corrected_text.append(token.text)
87
- else:
88
- corrected_text.append(token.text)
89
-
90
- return ' '.join(corrected_text)
91
-
92
- def correct_article_errors(text):
93
- doc = nlp(text)
94
- corrected_text = []
95
- for i, token in enumerate(doc):
96
- if token.text.lower() in ['a', 'an']:
97
- next_token = doc[i + 1] if i + 1 < len(doc) else None
98
- if next_token and next_token.text[0].lower() in "aeiou":
99
- corrected_text.append("an")
100
- else:
101
- corrected_text.append("a")
102
- else:
103
- corrected_text.append(token.text)
104
- return ' '.join(corrected_text)
105
-
106
- def correct_double_negatives(text):
107
- doc = nlp(text)
108
- corrected_text = []
109
- for token in doc:
110
- if token.dep_ == "neg" and any(child.dep_ == "neg" for child in token.head.children):
111
- continue
112
- else:
113
- corrected_text.append(token.text)
114
- return ' '.join(corrected_text)
115
-
116
  def ensure_subject_verb_agreement(text):
117
  doc = nlp(text)
118
  corrected_text = []
@@ -128,119 +47,28 @@ def ensure_subject_verb_agreement(text):
128
  corrected_text.append(token.text)
129
  return ' '.join(corrected_text)
130
 
131
- def enhanced_spell_check(text):
132
- words = text.split()
133
- corrected_words = []
134
- for word in words:
135
- if '_' in word:
136
- sub_words = word.split('_')
137
- corrected_sub_words = [spell.correction(w) or w for w in sub_words]
138
- corrected_words.append('_'.join(corrected_sub_words))
139
- else:
140
- corrected_word = spell.correction(word) or word
141
- corrected_words.append(corrected_word)
142
- return ' '.join(corrected_words)
143
-
144
- def correct_semantic_errors(text):
145
- semantic_corrections = {
146
- "animate_being": "animal",
147
- "little": "smallest",
148
- "big": "largest",
149
- "mammalian": "mammals",
150
- "universe": "world",
151
- "manner": "ways",
152
- "continue": "preserve",
153
- "dirt": "soil",
154
- "wellness": "health",
155
- "modulate": "regulate",
156
- "clime": "climate",
157
- "function": "role",
158
- "keeping": "maintaining",
159
- "lend": "contribute",
160
- "better": "improve",
161
- "cardinal": "key",
162
- "expeditiously": "efficiently",
163
- "marauder": "predator",
164
- "quarry": "prey",
165
- "forestalling": "preventing",
166
- "bend": "turn",
167
- "works": "plant",
168
- "croping": "grazing",
169
- "flora": "vegetation",
170
- "dynamical": "dynamic",
171
- "alteration": "change",
172
- "add-on": "addition",
173
- "indispensable": "essential",
174
- "nutrient": "food",
175
- "harvest": "crops",
176
- "pollenateing": "pollinating",
177
- "divers": "diverse",
178
- "beginning": "source",
179
- "homo": "humans",
180
- "fall_in": "collapse",
181
- "takeing": "leading",
182
- "coinage": "species",
183
- "trust": "rely",
184
- "angleworm": "earthworm",
185
- "interrupt": "break",
186
- "affair": "matter",
187
- "air_out": "aerate",
188
- "alimentary": "nutrient",
189
- "distributeed": "spread",
190
- "country": "areas",
191
- "reconstruct": "restore",
192
- "debauched": "degraded",
193
- "giant": "whales",
194
- "organic_structure": "bodies",
195
- "decease": "die",
196
- "carcase": "carcasses",
197
- "pin_downing": "trapping",
198
- "cut_downs": "reduces",
199
- "ambiance": "atmosphere",
200
- "extenuateing": "mitigating",
201
- "decision": "conclusion",
202
- "doing": "making",
203
- "prolongs": "sustains",
204
- "home_ground": "habitats",
205
- "continueing": "preserving",
206
- "populateing": "living",
207
- "beingness": "beings"
208
- }
209
-
210
- words = text.split()
211
- corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
212
- return ' '.join(corrected_words)
213
-
214
- def enhance_punctuation(text):
215
- text = re.sub(r'\s+([?.!,";:])', r'\1', text)
216
- text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)
217
- text = re.sub(r'\s*"\s*', '" ', text).strip()
218
- text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
219
- text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text)
220
- return text
221
-
222
  def correct_apostrophes(text):
223
- text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text)
224
- text = re.sub(r"\b(\w+)s'\b", r"\1s'", text)
225
  return text
226
 
227
- def handle_possessives(text):
228
- text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
 
 
229
  return text
230
 
 
231
  def rephrase_with_synonyms(text):
232
  doc = nlp(text)
233
  rephrased_text = []
234
 
235
  for token in doc:
236
- if token.text.lower() == "earth":
237
- rephrased_text.append("Earth")
238
- continue
239
-
240
  pos_tag = None
241
  if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
242
  pos_tag = getattr(wordnet, token.pos_)
243
-
244
  if pos_tag:
245
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
246
  if synonyms:
@@ -260,9 +88,9 @@ def rephrase_with_synonyms(text):
260
 
261
  return ' '.join(rephrased_text)
262
 
 
263
  def paraphrase_and_correct(text):
264
  text = enhanced_spell_check(text)
265
- text = correct_semantic_errors(text)
266
  text = remove_redundant_words(text)
267
  text = capitalize_sentences_and_nouns(text)
268
  text = correct_tense_errors(text)
@@ -270,32 +98,22 @@ def paraphrase_and_correct(text):
270
  text = correct_article_errors(text)
271
  text = enhance_punctuation(text)
272
  text = correct_apostrophes(text)
273
- text = handle_possessives(text)
274
  text = rephrase_with_synonyms(text)
275
  text = correct_double_negatives(text)
276
  text = ensure_subject_verb_agreement(text)
277
- text = ' '.join(word.capitalize() if word.lower() in ['i', 'earth'] else word for word in text.split())
278
  return text
279
 
280
- def detect_ai(text):
281
- label, score = predict_en(text)
282
- return label, score
283
-
284
  def gradio_interface(text):
285
- label, score = detect_ai(text)
286
  corrected_text = paraphrase_and_correct(text)
287
- return {label: score}, corrected_text
288
 
289
  iface = gr.Interface(
290
  fn=gradio_interface,
291
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
292
- outputs=[
293
- gr.Label(num_top_classes=1),
294
- gr.Textbox(label="Corrected Text")
295
- ],
296
- title="AI Detection and Grammar Correction",
297
- description="Detect AI-generated content and correct grammar issues."
298
  )
299
 
300
  if __name__ == "__main__":
301
- iface.launch()
 
1
+ # Added more redundant/filler words
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  def remove_redundant_words(text):
3
  doc = nlp(text)
4
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply", "that", "kind of", "sort of", "you know", "honestly", "seriously"}
5
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
6
  return ' '.join(filtered_text)
7
 
8
+ # Capitalize sentences and proper nouns
9
  def capitalize_sentences_and_nouns(text):
10
  doc = nlp(text)
11
  corrected_text = []
 
12
  for sent in doc.sents:
13
  sentence = []
14
  for token in sent:
15
  if token.i == sent.start or token.pos_ == "PROPN":
16
  sentence.append(token.text.capitalize())
17
  else:
18
+ sentence.append(token.text.lower())
19
  corrected_text.append(' '.join(sentence))
 
20
  return ' '.join(corrected_text)
21
 
22
+ # Function to dynamically correct tenses and verb forms
23
  def correct_tense_errors(text):
24
  doc = nlp(text)
25
  corrected_text = []
 
31
  corrected_text.append(token.text)
32
  return ' '.join(corrected_text)
33
 
34
+ # Enhanced function to handle subject-verb agreement
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def ensure_subject_verb_agreement(text):
36
  doc = nlp(text)
37
  corrected_text = []
 
47
  corrected_text.append(token.text)
48
  return ' '.join(corrected_text)
49
 
50
+ # Ensure proper apostrophe usage and possessives
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def correct_apostrophes(text):
52
+ text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
53
+ text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
54
  return text
55
 
56
+ # Enhanced punctuation
57
+ def enhance_punctuation(text):
58
+ text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
59
+ text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
60
  return text
61
 
62
+ # Paraphrasing using synonyms and correcting semantic errors
63
  def rephrase_with_synonyms(text):
64
  doc = nlp(text)
65
  rephrased_text = []
66
 
67
  for token in doc:
 
 
 
 
68
  pos_tag = None
69
  if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
70
  pos_tag = getattr(wordnet, token.pos_)
71
+
72
  if pos_tag:
73
  synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
74
  if synonyms:
 
88
 
89
  return ' '.join(rephrased_text)
90
 
91
+ # Comprehensive text correction
92
  def paraphrase_and_correct(text):
93
  text = enhanced_spell_check(text)
 
94
  text = remove_redundant_words(text)
95
  text = capitalize_sentences_and_nouns(text)
96
  text = correct_tense_errors(text)
 
98
  text = correct_article_errors(text)
99
  text = enhance_punctuation(text)
100
  text = correct_apostrophes(text)
 
101
  text = rephrase_with_synonyms(text)
102
  text = correct_double_negatives(text)
103
  text = ensure_subject_verb_agreement(text)
 
104
  return text
105
 
106
+ # Integrate with Gradio UI
 
 
 
107
  def gradio_interface(text):
 
108
  corrected_text = paraphrase_and_correct(text)
109
+ return corrected_text
110
 
111
  iface = gr.Interface(
112
  fn=gradio_interface,
113
  inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
114
+ outputs=[gr.Textbox(label="Corrected Text")],
115
+ title="Grammar & Semantic Error Correction",
 
 
 
 
116
  )
117
 
118
  if __name__ == "__main__":
119
+ iface.launch()