Spaces:
Sleeping
Sleeping
File size: 4,425 Bytes
fa69dbc 486bbd6 cf3f184 fa69dbc cf3f184 fa69dbc cf3f184 42515fd cf3f184 fa69dbc cf3f184 fa69dbc cf3f184 fa69dbc cf3f184 42515fd cf3f184 42515fd cf3f184 42515fd cf3f184 486bbd6 fa69dbc 42515fd fa69dbc cf3f184 fa69dbc cf3f184 fa69dbc 486bbd6 cf3f184 42515fd fa69dbc 486bbd6 cf3f184 42515fd cf3f184 42515fd cf3f184 42515fd cf3f184 42515fd cf3f184 486bbd6 fa69dbc 353216c 42515fd 353216c fa69dbc cf3f184 fa69dbc a2b6ad0 fa69dbc a2b6ad0 cf3f184 42515fd fa69dbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# Added more redundant/filler words
def remove_redundant_words(text):
doc = nlp(text)
meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply", "that", "kind of", "sort of", "you know", "honestly", "seriously"}
filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
return ' '.join(filtered_text)
# Capitalize sentences and proper nouns
def capitalize_sentences_and_nouns(text):
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start or token.pos_ == "PROPN":
sentence.append(token.text.capitalize())
else:
sentence.append(token.text.lower())
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
# Function to dynamically correct tenses and verb forms
def correct_tense_errors(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
corrected_text.append(lemma)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Enhanced function to handle subject-verb agreement
def ensure_subject_verb_agreement(text):
doc = nlp(text)
corrected_text = []
for token in doc:
if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
if token.tag_ == "NN" and token.head.tag_ != "VBZ":
corrected_text.append(token.head.lemma_ + "s")
elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
corrected_text.append(token.head.lemma_)
else:
corrected_text.append(token.head.text)
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Ensure proper apostrophe usage and possessives
def correct_apostrophes(text):
text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text) # Simple apostrophe correction
text = re.sub(r"\b(\w+)s'\b", r"\1s'", text) # Handles plural possessives
return text
# Enhanced punctuation
def enhance_punctuation(text):
text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove extra space before punctuation
text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) # Add space after punctuation if needed
return text
# Paraphrasing using synonyms and correcting semantic errors
def rephrase_with_synonyms(text):
doc = nlp(text)
rephrased_text = []
for token in doc:
pos_tag = None
if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
pos_tag = getattr(wordnet, token.pos_)
if pos_tag:
synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
if synonyms:
synonym = synonyms[0]
if token.pos_ == "VERB":
if token.tag_ == "VBG":
synonym = synonym + 'ing'
elif token.tag_ in ["VBD", "VBN"]:
synonym = synonym + 'ed'
elif token.tag_ == "VBZ":
synonym = synonym + 's'
rephrased_text.append(synonym)
else:
rephrased_text.append(token.text)
else:
rephrased_text.append(token.text)
return ' '.join(rephrased_text)
# Comprehensive text correction
def paraphrase_and_correct(text):
text = enhanced_spell_check(text)
text = remove_redundant_words(text)
text = capitalize_sentences_and_nouns(text)
text = correct_tense_errors(text)
text = correct_singular_plural_errors(text)
text = correct_article_errors(text)
text = enhance_punctuation(text)
text = correct_apostrophes(text)
text = rephrase_with_synonyms(text)
text = correct_double_negatives(text)
text = ensure_subject_verb_agreement(text)
return text
# Integrate with Gradio UI
def gradio_interface(text):
corrected_text = paraphrase_and_correct(text)
return corrected_text
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
outputs=[gr.Textbox(label="Corrected Text")],
title="Grammar & Semantic Error Correction",
)
if __name__ == "__main__":
iface.launch()
|