Spaces:
Sleeping
Sleeping
File size: 8,119 Bytes
04919b2 486bbd6 cf3f184 04919b2 cf3f184 04919b2 cf3f184 42515fd cf3f184 fa69dbc cf3f184 04919b2 cf3f184 04919b2 cf3f184 42515fd cf3f184 42515fd cf3f184 42515fd cf3f184 486bbd6 04919b2 42515fd fa69dbc cf3f184 04919b2 fa69dbc 04919b2 cf3f184 04919b2 486bbd6 cf3f184 42515fd fa69dbc 486bbd6 cf3f184 42515fd cf3f184 42515fd cf3f184 42515fd cf3f184 42515fd cf3f184 486bbd6 04919b2 353216c 04919b2 353216c 42515fd 04919b2 353216c 04919b2 cf3f184 fa69dbc a2b6ad0 04919b2 a2b6ad0 fa69dbc a2b6ad0 cf3f184 04919b2 42515fd fa69dbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
import os
import gradio as gr
from transformers import pipeline
import spacy
import nltk
from nltk.corpus import wordnet
from spellchecker import SpellChecker
import re
import inflect
# Initialize components
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
print("Downloading spaCy model...")
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
# Initialize the spell checker
spell = SpellChecker()
# Initialize the inflect engine for pluralization
inflect_engine = inflect.engine()
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)
# Function to remove redundant/filler words
def remove_redundant_words(text):
doc = nlp(text)
meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply",
"that", "kind of", "sort of", "you know", "honestly", "seriously"}
filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
return ' '.join(filtered_text)
# Function to capitalize sentences and proper nouns
def capitalize_sentences_and_nouns(text):
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start or token.pos_ == "PROPN":
sentence.append(token.text.capitalize())
else:
sentence.append(token.text.lower())
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
# Function to correct verb tenses
def correct_tense_errors(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
corrected_text.append(lemma)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Function to ensure subject-verb agreement
def ensure_subject_verb_agreement(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
if token.tag_ == "NN" and token.head.tag_ != "VBZ":
corrected_text.append(token.head.lemma_ + "s")
elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
corrected_text.append(token.head.lemma_)
else:
corrected_text.append(token.head.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Function to correct apostrophe usage
def correct_apostrophes(text):
text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
return text
# Function to enhance punctuation usage
def enhance_punctuation(text):
text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
text = re.sub(r'\s*"\s*', '" ', text).strip() # Clean up spaces around quotes
text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text) # Ensure sentences start with capitalized words
return text
# Function to correct semantic errors and replace with more appropriate words
def correct_semantic_errors(text):
semantic_corrections = {
"animate_being": "animal",
"little": "smallest",
"big": "largest",
"mammalian": "mammals",
"universe": "world",
"manner": "ways",
"continue": "preserve",
"dirt": "soil",
"wellness": "health",
"modulate": "regulate",
"clime": "climate",
"function": "role",
"keeping": "maintaining",
"lend": "contribute",
"better": "improve",
"cardinal": "key",
"expeditiously": "efficiently",
"marauder": "predator",
"quarry": "prey",
"forestalling": "preventing",
"bend": "turn",
"works": "plant",
"croping": "grazing",
"flora": "vegetation",
"dynamical": "dynamic",
"alteration": "change",
"add-on": "addition",
"indispensable": "essential",
"nutrient": "food",
"harvest": "crops",
"pollenateing": "pollinating",
"divers": "diverse",
"beginning": "source",
"homo": "humans",
"fall_in": "collapse",
"takeing": "leading",
"coinage": "species",
"trust": "rely",
"angleworm": "earthworm",
"interrupt": "break",
"affair": "matter",
"air_out": "aerate",
"alimentary": "nutrient",
"distributeed": "spread",
"country": "areas",
"reconstruct": "restore",
"debauched": "degraded",
"giant": "whales",
"organic_structure": "bodies",
"decease": "die",
"carcase": "carcasses",
"pin_downing": "trapping",
"cut_downs": "reduces",
"ambiance": "atmosphere",
"extenuateing": "mitigating",
"decision": "conclusion",
"doing": "making",
"prolongs": "sustains",
"home_ground": "habitats",
"continueing": "preserving",
"populateing": "living",
"beingness": "beings"
}
words = text.split()
corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
return ' '.join(corrected_words)
# Function to rephrase using synonyms and adjust verb forms
def rephrase_with_synonyms(text):
doc = nlp(text)
rephrased_text = []
for token in doc:
pos_tag = None
if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
pos_tag = getattr(wordnet, token.pos_)
if pos_tag:
synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
if synonyms:
synonym = synonyms[0]
if token.pos_ == "VERB":
if token.tag_ == "VBG":
synonym = synonym + 'ing'
elif token.tag_ in ["VBD", "VBN"]:
synonym = synonym + 'ed'
elif token.tag_ == "VBZ":
synonym = synonym + 's'
rephrased_text.append(synonym)
else:
rephrased_text.append(token.text)
else:
rephrased_text.append(token.text)
return ' '.join(rephrased_text)
# Function to apply enhanced spell check
def enhanced_spell_check(text):
words = text.split()
corrected_words = []
for word in words:
if '_' in word:
sub_words = word.split('_')
corrected_sub_words = [spell.correction(w) or w for w in sub_words]
corrected_words.append('_'.join(corrected_sub_words))
else:
corrected_word = spell.correction(word) or word
corrected_words.append(corrected_word)
return ' '.join(corrected_words)
# Comprehensive function to correct the entire text
def paraphrase_and_correct(text):
text = enhanced_spell_check(text)
text = remove_redundant_words(text)
text = capitalize_sentences_and_nouns(text)
text = correct_tense_errors(text)
text = ensure_subject_verb_agreement(text)
text = enhance_punctuation(text)
text = correct_apostrophes(text)
text = correct_semantic_errors(text)
text = rephrase_with_synonyms(text)
return text
# Gradio interface function
def gradio_interface(text):
corrected_text = paraphrase_and_correct(text)
return corrected_text
# Setting up Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
outputs=[gr.Textbox(label="Corrected Text")],
title="Grammar & Semantic Error Correction",
)
# Run the Gradio interface
if __name__ == "__main__":
iface.launch()
|