sashtech's picture
Update app.py
fa69dbc verified
raw
history blame
4.43 kB
# Added more redundant/filler words
def remove_redundant_words(text):
doc = nlp(text)
meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply", "that", "kind of", "sort of", "you know", "honestly", "seriously"}
filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
return ' '.join(filtered_text)
# Capitalize sentences and proper nouns
def capitalize_sentences_and_nouns(text):
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start or token.pos_ == "PROPN":
sentence.append(token.text.capitalize())
else:
sentence.append(token.text.lower())
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
# Function to dynamically correct tenses and verb forms
def correct_tense_errors(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
corrected_text.append(lemma)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Enhanced function to handle subject-verb agreement
def ensure_subject_verb_agreement(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
if token.tag_ == "NN" and token.head.tag_ != "VBZ":
corrected_text.append(token.head.lemma_ + "s")
elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
corrected_text.append(token.head.lemma_)
else:
corrected_text.append(token.head.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Ensure proper apostrophe usage and possessives
def correct_apostrophes(text):
text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
return text
# Enhanced punctuation
def enhance_punctuation(text):
text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
return text
# Paraphrasing using synonyms and correcting semantic errors
def rephrase_with_synonyms(text):
doc = nlp(text)
rephrased_text = []
for token in doc:
pos_tag = None
if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
pos_tag = getattr(wordnet, token.pos_)
if pos_tag:
synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
if synonyms:
synonym = synonyms[0]
if token.pos_ == "VERB":
if token.tag_ == "VBG":
synonym = synonym + 'ing'
elif token.tag_ in ["VBD", "VBN"]:
synonym = synonym + 'ed'
elif token.tag_ == "VBZ":
synonym = synonym + 's'
rephrased_text.append(synonym)
else:
rephrased_text.append(token.text)
else:
rephrased_text.append(token.text)
return ' '.join(rephrased_text)
# Comprehensive text correction
def paraphrase_and_correct(text):
text = enhanced_spell_check(text)
text = remove_redundant_words(text)
text = capitalize_sentences_and_nouns(text)
text = correct_tense_errors(text)
text = correct_singular_plural_errors(text)
text = correct_article_errors(text)
text = enhance_punctuation(text)
text = correct_apostrophes(text)
text = rephrase_with_synonyms(text)
text = correct_double_negatives(text)
text = ensure_subject_verb_agreement(text)
return text
# Integrate with Gradio UI
def gradio_interface(text):
corrected_text = paraphrase_and_correct(text)
return corrected_text
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
outputs=[gr.Textbox(label="Corrected Text")],
title="Grammar & Semantic Error Correction",
)
if __name__ == "__main__":
iface.launch()