sashtech's picture
Update app.py
04919b2 verified
raw
history blame
8.12 kB
import os
import gradio as gr
from transformers import pipeline
import spacy
import nltk
from nltk.corpus import wordnet
from spellchecker import SpellChecker
import re
import inflect
# Initialize components
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
print("Downloading spaCy model...")
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
# Initialize the spell checker
spell = SpellChecker()
# Initialize the inflect engine for pluralization
inflect_engine = inflect.engine()
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)
# Function to remove redundant/filler words
def remove_redundant_words(text):
doc = nlp(text)
meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply",
"that", "kind of", "sort of", "you know", "honestly", "seriously"}
filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
return ' '.join(filtered_text)
# Function to capitalize sentences and proper nouns
def capitalize_sentences_and_nouns(text):
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start or token.pos_ == "PROPN":
sentence.append(token.text.capitalize())
else:
sentence.append(token.text.lower())
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
# Function to correct verb tenses
def correct_tense_errors(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
corrected_text.append(lemma)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Function to ensure subject-verb agreement
def ensure_subject_verb_agreement(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
if token.tag_ == "NN" and token.head.tag_ != "VBZ":
corrected_text.append(token.head.lemma_ + "s")
elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
corrected_text.append(token.head.lemma_)
else:
corrected_text.append(token.head.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Function to correct apostrophe usage
def correct_apostrophes(text):
text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
return text
# Function to enhance punctuation usage
def enhance_punctuation(text):
text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
text = re.sub(r'\s*"\s*', '" ', text).strip() # Clean up spaces around quotes
text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text) # Ensure sentences start with capitalized words
return text
# Function to correct semantic errors and replace with more appropriate words
def correct_semantic_errors(text):
semantic_corrections = {
"animate_being": "animal",
"little": "smallest",
"big": "largest",
"mammalian": "mammals",
"universe": "world",
"manner": "ways",
"continue": "preserve",
"dirt": "soil",
"wellness": "health",
"modulate": "regulate",
"clime": "climate",
"function": "role",
"keeping": "maintaining",
"lend": "contribute",
"better": "improve",
"cardinal": "key",
"expeditiously": "efficiently",
"marauder": "predator",
"quarry": "prey",
"forestalling": "preventing",
"bend": "turn",
"works": "plant",
"croping": "grazing",
"flora": "vegetation",
"dynamical": "dynamic",
"alteration": "change",
"add-on": "addition",
"indispensable": "essential",
"nutrient": "food",
"harvest": "crops",
"pollenateing": "pollinating",
"divers": "diverse",
"beginning": "source",
"homo": "humans",
"fall_in": "collapse",
"takeing": "leading",
"coinage": "species",
"trust": "rely",
"angleworm": "earthworm",
"interrupt": "break",
"affair": "matter",
"air_out": "aerate",
"alimentary": "nutrient",
"distributeed": "spread",
"country": "areas",
"reconstruct": "restore",
"debauched": "degraded",
"giant": "whales",
"organic_structure": "bodies",
"decease": "die",
"carcase": "carcasses",
"pin_downing": "trapping",
"cut_downs": "reduces",
"ambiance": "atmosphere",
"extenuateing": "mitigating",
"decision": "conclusion",
"doing": "making",
"prolongs": "sustains",
"home_ground": "habitats",
"continueing": "preserving",
"populateing": "living",
"beingness": "beings"
}
words = text.split()
corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
return ' '.join(corrected_words)
# Function to rephrase using synonyms and adjust verb forms
def rephrase_with_synonyms(text):
doc = nlp(text)
rephrased_text = []
for token in doc:
pos_tag = None
if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
pos_tag = getattr(wordnet, token.pos_)
if pos_tag:
synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
if synonyms:
synonym = synonyms[0]
if token.pos_ == "VERB":
if token.tag_ == "VBG":
synonym = synonym + 'ing'
elif token.tag_ in ["VBD", "VBN"]:
synonym = synonym + 'ed'
elif token.tag_ == "VBZ":
synonym = synonym + 's'
rephrased_text.append(synonym)
else:
rephrased_text.append(token.text)
else:
rephrased_text.append(token.text)
return ' '.join(rephrased_text)
# Function to apply enhanced spell check
def enhanced_spell_check(text):
words = text.split()
corrected_words = []
for word in words:
if '_' in word:
sub_words = word.split('_')
corrected_sub_words = [spell.correction(w) or w for w in sub_words]
corrected_words.append('_'.join(corrected_sub_words))
else:
corrected_word = spell.correction(word) or word
corrected_words.append(corrected_word)
return ' '.join(corrected_words)
# Comprehensive function to correct the entire text
def paraphrase_and_correct(text):
text = enhanced_spell_check(text)
text = remove_redundant_words(text)
text = capitalize_sentences_and_nouns(text)
text = correct_tense_errors(text)
text = ensure_subject_verb_agreement(text)
text = enhance_punctuation(text)
text = correct_apostrophes(text)
text = correct_semantic_errors(text)
text = rephrase_with_synonyms(text)
return text
# Gradio interface function
def gradio_interface(text):
corrected_text = paraphrase_and_correct(text)
return corrected_text
# Setting up Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
outputs=[gr.Textbox(label="Corrected Text")],
title="Grammar & Semantic Error Correction",
)
# Run the Gradio interface
if __name__ == "__main__":
iface.launch()