sashtech commited on
Commit
c5d2e49
·
verified ·
1 Parent(s): 04919b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -186
app.py CHANGED
@@ -2,187 +2,96 @@ import os
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
 
5
  import nltk
6
  from nltk.corpus import wordnet
7
  from spellchecker import SpellChecker
8
- import re
9
- import inflect
10
 
11
- # Initialize components
12
- try:
13
- nlp = spacy.load("en_core_web_sm")
14
- except OSError:
15
- print("Downloading spaCy model...")
16
- spacy.cli.download("en_core_web_sm")
17
- nlp = spacy.load("en_core_web_sm")
18
 
19
  # Initialize the spell checker
20
  spell = SpellChecker()
21
 
22
- # Initialize the inflect engine for pluralization
23
- inflect_engine = inflect.engine()
24
 
25
  # Ensure necessary NLTK data is downloaded
26
- nltk.download('wordnet', quiet=True)
27
- nltk.download('omw-1.4', quiet=True)
 
 
 
 
 
 
 
28
 
29
- # Function to remove redundant/filler words
 
 
 
 
 
30
  def remove_redundant_words(text):
31
  doc = nlp(text)
32
- meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply",
33
- "that", "kind of", "sort of", "you know", "honestly", "seriously"}
34
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
35
  return ' '.join(filtered_text)
36
 
37
- # Function to capitalize sentences and proper nouns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def capitalize_sentences_and_nouns(text):
39
  doc = nlp(text)
40
  corrected_text = []
 
41
  for sent in doc.sents:
42
  sentence = []
43
  for token in sent:
44
- if token.i == sent.start or token.pos_ == "PROPN":
 
 
45
  sentence.append(token.text.capitalize())
46
  else:
47
- sentence.append(token.text.lower())
48
  corrected_text.append(' '.join(sentence))
49
- return ' '.join(corrected_text)
50
 
51
- # Function to correct verb tenses
52
- def correct_tense_errors(text):
53
- doc = nlp(text)
54
- corrected_text = []
55
- for token in doc:
56
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
57
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
58
- corrected_text.append(lemma)
59
- else:
60
- corrected_text.append(token.text)
61
  return ' '.join(corrected_text)
62
 
63
- # Function to ensure subject-verb agreement
64
- def ensure_subject_verb_agreement(text):
65
- doc = nlp(text)
66
- corrected_text = []
67
- for token in doc:
68
- if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
69
- if token.tag_ == "NN" and token.head.tag_ != "VBZ":
70
- corrected_text.append(token.head.lemma_ + "s")
71
- elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
72
- corrected_text.append(token.head.lemma_)
73
- else:
74
- corrected_text.append(token.head.text)
75
- else:
76
- corrected_text.append(token.text)
77
- return ' '.join(corrected_text)
78
-
79
- # Function to correct apostrophe usage
80
- def correct_apostrophes(text):
81
- text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
82
- text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
83
- return text
84
-
85
- # Function to enhance punctuation usage
86
- def enhance_punctuation(text):
87
- text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
88
- text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
89
- text = re.sub(r'\s*"\s*', '" ', text).strip() # Clean up spaces around quotes
90
- text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
91
- text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text) # Ensure sentences start with capitalized words
92
- return text
93
-
94
- # Function to correct semantic errors and replace with more appropriate words
95
- def correct_semantic_errors(text):
96
- semantic_corrections = {
97
- "animate_being": "animal",
98
- "little": "smallest",
99
- "big": "largest",
100
- "mammalian": "mammals",
101
- "universe": "world",
102
- "manner": "ways",
103
- "continue": "preserve",
104
- "dirt": "soil",
105
- "wellness": "health",
106
- "modulate": "regulate",
107
- "clime": "climate",
108
- "function": "role",
109
- "keeping": "maintaining",
110
- "lend": "contribute",
111
- "better": "improve",
112
- "cardinal": "key",
113
- "expeditiously": "efficiently",
114
- "marauder": "predator",
115
- "quarry": "prey",
116
- "forestalling": "preventing",
117
- "bend": "turn",
118
- "works": "plant",
119
- "croping": "grazing",
120
- "flora": "vegetation",
121
- "dynamical": "dynamic",
122
- "alteration": "change",
123
- "add-on": "addition",
124
- "indispensable": "essential",
125
- "nutrient": "food",
126
- "harvest": "crops",
127
- "pollenateing": "pollinating",
128
- "divers": "diverse",
129
- "beginning": "source",
130
- "homo": "humans",
131
- "fall_in": "collapse",
132
- "takeing": "leading",
133
- "coinage": "species",
134
- "trust": "rely",
135
- "angleworm": "earthworm",
136
- "interrupt": "break",
137
- "affair": "matter",
138
- "air_out": "aerate",
139
- "alimentary": "nutrient",
140
- "distributeed": "spread",
141
- "country": "areas",
142
- "reconstruct": "restore",
143
- "debauched": "degraded",
144
- "giant": "whales",
145
- "organic_structure": "bodies",
146
- "decease": "die",
147
- "carcase": "carcasses",
148
- "pin_downing": "trapping",
149
- "cut_downs": "reduces",
150
- "ambiance": "atmosphere",
151
- "extenuateing": "mitigating",
152
- "decision": "conclusion",
153
- "doing": "making",
154
- "prolongs": "sustains",
155
- "home_ground": "habitats",
156
- "continueing": "preserving",
157
- "populateing": "living",
158
- "beingness": "beings"
159
- }
160
-
161
- words = text.split()
162
- corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
163
- return ' '.join(corrected_words)
164
-
165
- # Function to rephrase using synonyms and adjust verb forms
166
  def rephrase_with_synonyms(text):
167
  doc = nlp(text)
168
  rephrased_text = []
169
 
170
  for token in doc:
171
  pos_tag = None
172
- if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
173
- pos_tag = getattr(wordnet, token.pos_)
174
-
 
 
 
 
 
 
175
  if pos_tag:
176
- synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
177
  if synonyms:
178
- synonym = synonyms[0]
179
- if token.pos_ == "VERB":
180
- if token.tag_ == "VBG":
181
- synonym = synonym + 'ing'
182
- elif token.tag_ in ["VBD", "VBN"]:
183
- synonym = synonym + 'ed'
184
- elif token.tag_ == "VBZ":
185
- synonym = synonym + 's'
186
  rephrased_text.append(synonym)
187
  else:
188
  rephrased_text.append(token.text)
@@ -191,46 +100,45 @@ def rephrase_with_synonyms(text):
191
 
192
  return ' '.join(rephrased_text)
193
 
194
- # Function to apply enhanced spell check
195
- def enhanced_spell_check(text):
196
- words = text.split()
197
- corrected_words = []
198
- for word in words:
199
- if '_' in word:
200
- sub_words = word.split('_')
201
- corrected_sub_words = [spell.correction(w) or w for w in sub_words]
202
- corrected_words.append('_'.join(corrected_sub_words))
203
- else:
204
- corrected_word = spell.correction(word) or word
205
- corrected_words.append(corrected_word)
206
- return ' '.join(corrected_words)
207
-
208
- # Comprehensive function to correct the entire text
209
  def paraphrase_and_correct(text):
210
- text = enhanced_spell_check(text)
211
- text = remove_redundant_words(text)
212
- text = capitalize_sentences_and_nouns(text)
213
- text = correct_tense_errors(text)
214
- text = ensure_subject_verb_agreement(text)
215
- text = enhance_punctuation(text)
216
- text = correct_apostrophes(text)
217
- text = correct_semantic_errors(text)
218
- text = rephrase_with_synonyms(text)
219
- return text
220
-
221
- # Gradio interface function
222
- def gradio_interface(text):
223
- corrected_text = paraphrase_and_correct(text)
224
- return corrected_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- # Setting up Gradio interface
227
- iface = gr.Interface(
228
- fn=gradio_interface,
229
- inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
230
- outputs=[gr.Textbox(label="Corrected Text")],
231
- title="Grammar & Semantic Error Correction",
232
- )
233
-
234
- # Run the Gradio interface
235
- if __name__ == "__main__":
236
- iface.launch()
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
5
+ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
+ import language_tool_python
 
10
 
11
+ # Initialize the English text classification pipeline for AI detection
12
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
 
 
 
 
13
 
14
  # Initialize the spell checker
15
  spell = SpellChecker()
16
 
17
+ # Initialize the LanguageTool for grammar correction
18
+ tool = language_tool_python.LanguageTool('en-US')
19
 
20
  # Ensure necessary NLTK data is downloaded
21
+ nltk.download('wordnet')
22
+ nltk.download('omw-1.4')
23
+
24
+ # Ensure the SpaCy model is installed
25
+ try:
26
+ nlp = spacy.load("en_core_web_sm")
27
+ except OSError:
28
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
29
+ nlp = spacy.load("en_core_web_sm")
30
 
31
+ # Function to predict the label and score for English text (AI Detection)
32
+ def predict_en(text):
33
+ res = pipeline_en(text)[0]
34
+ return res['label'], res['score']
35
+
36
+ # Function to remove redundant and meaningless words
37
  def remove_redundant_words(text):
38
  doc = nlp(text)
39
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
 
40
  filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
41
  return ' '.join(filtered_text)
42
 
43
+ # Function to apply grammatical corrections using LanguageTool
44
+ def correct_grammar(text):
45
+ corrected_text = tool.correct(text)
46
+ return corrected_text
47
+
48
+ # Function to correct spelling errors
49
+ def correct_spelling(text):
50
+ words = text.split()
51
+ corrected_words = []
52
+ for word in words:
53
+ corrected_word = spell.correction(word)
54
+ corrected_words.append(corrected_word if corrected_word else word) # Keep original word if no correction
55
+ return ' '.join(corrected_words)
56
+
57
+ # Function to capitalize the first letter of each sentence and proper nouns
58
  def capitalize_sentences_and_nouns(text):
59
  doc = nlp(text)
60
  corrected_text = []
61
+
62
  for sent in doc.sents:
63
  sentence = []
64
  for token in sent:
65
+ if token.i == sent.start: # First word of the sentence
66
+ sentence.append(token.text.capitalize())
67
+ elif token.pos_ == "PROPN": # Proper noun
68
  sentence.append(token.text.capitalize())
69
  else:
70
+ sentence.append(token.text)
71
  corrected_text.append(' '.join(sentence))
 
72
 
 
 
 
 
 
 
 
 
 
 
73
  return ' '.join(corrected_text)
74
 
75
+ # Function to rephrase with contextually appropriate synonyms
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def rephrase_with_synonyms(text):
77
  doc = nlp(text)
78
  rephrased_text = []
79
 
80
  for token in doc:
81
  pos_tag = None
82
+ if token.pos_ == "NOUN":
83
+ pos_tag = wordnet.NOUN
84
+ elif token.pos_ == "VERB":
85
+ pos_tag = wordnet.VERB
86
+ elif token.pos_ == "ADJ":
87
+ pos_tag = wordnet.ADJ
88
+ elif token.pos_ == "ADV":
89
+ pos_tag = wordnet.ADV
90
+
91
  if pos_tag:
92
+ synonyms = wordnet.synsets(token.text, pos=pos_tag)
93
  if synonyms:
94
+ synonym = synonyms[0].lemmas()[0].name() # Choose the first synonym
 
 
 
 
 
 
 
95
  rephrased_text.append(synonym)
96
  else:
97
  rephrased_text.append(token.text)
 
100
 
101
  return ' '.join(rephrased_text)
102
 
103
+ # Comprehensive function for paraphrasing and grammar correction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def paraphrase_and_correct(text):
105
+ # Step 1: Remove meaningless or redundant words
106
+ cleaned_text = remove_redundant_words(text)
107
+
108
+ # Step 2: Capitalize sentences and proper nouns
109
+ paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
110
+
111
+ # Step 3: Correct grammar using LanguageTool
112
+ paraphrased_text = correct_grammar(paraphrased_text)
113
+
114
+ # Step 4: Rephrase with contextually appropriate synonyms
115
+ paraphrased_text = rephrase_with_synonyms(paraphrased_text)
116
+
117
+ # Step 5: Correct spelling errors
118
+ paraphrased_text = correct_spelling(paraphrased_text)
119
+
120
+ # Step 6: Correct any remaining grammar issues after rephrasing
121
+ paraphrased_text = correct_grammar(paraphrased_text)
122
+
123
+ return paraphrased_text
124
+
125
+ # Gradio app setup with two tabs
126
+ with gr.Blocks() as demo:
127
+ with gr.Tab("AI Detection"):
128
+ t1 = gr.Textbox(lines=5, label='Text')
129
+ button1 = gr.Button("🤖 Predict!")
130
+ label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
131
+ score1 = gr.Textbox(lines=1, label='Prob')
132
+
133
+ # Connect the prediction function to the button
134
+ button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
135
+
136
+ with gr.Tab("Paraphrasing & Grammar Correction"):
137
+ t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
138
+ button2 = gr.Button("🔄 Paraphrase and Correct")
139
+ result2 = gr.Textbox(lines=5, label='Corrected Text')
140
+
141
+ # Connect the paraphrasing and correction function to the button
142
+ button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
143
 
144
+ demo.launch(share=True)