File size: 4,425 Bytes
fa69dbc
486bbd6
cf3f184
fa69dbc
cf3f184
 
 
fa69dbc
cf3f184
 
 
 
 
 
42515fd
cf3f184
 
fa69dbc
cf3f184
 
 
fa69dbc
cf3f184
 
 
 
 
 
 
 
 
 
 
fa69dbc
cf3f184
 
 
 
 
42515fd
cf3f184
42515fd
cf3f184
42515fd
 
 
 
cf3f184
486bbd6
fa69dbc
42515fd
fa69dbc
 
cf3f184
 
fa69dbc
 
 
 
cf3f184
 
fa69dbc
486bbd6
 
 
 
 
cf3f184
42515fd
 
fa69dbc
486bbd6
 
cf3f184
42515fd
cf3f184
42515fd
cf3f184
42515fd
cf3f184
42515fd
cf3f184
 
 
 
486bbd6
 
 
 
 
fa69dbc
353216c
 
 
 
 
 
 
 
42515fd
353216c
 
 
 
 
fa69dbc
cf3f184
 
fa69dbc
a2b6ad0
 
 
 
fa69dbc
 
a2b6ad0
cf3f184
42515fd
fa69dbc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Added more redundant/filler words
def remove_redundant_words(text):
    doc = nlp(text)
    meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply", "that", "kind of", "sort of", "you know", "honestly", "seriously"}
    filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
    return ' '.join(filtered_text)

# Capitalize sentences and proper nouns
def capitalize_sentences_and_nouns(text):
    doc = nlp(text)
    corrected_text = []
    for sent in doc.sents:
        sentence = []
        for token in sent:
            if token.i == sent.start or token.pos_ == "PROPN":
                sentence.append(token.text.capitalize())
            else:
                sentence.append(token.text.lower())
        corrected_text.append(' '.join(sentence))
    return ' '.join(corrected_text)

# Function to dynamically correct tenses and verb forms
def correct_tense_errors(text):
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
            corrected_text.append(lemma)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

# Enhanced function to handle subject-verb agreement
def ensure_subject_verb_agreement(text):
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
            if token.tag_ == "NN" and token.head.tag_ != "VBZ":
                corrected_text.append(token.head.lemma_ + "s")
            elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
                corrected_text.append(token.head.lemma_)
            else:
                corrected_text.append(token.head.text)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

# Ensure proper apostrophe usage and possessives
def correct_apostrophes(text):
    text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text)  # Simple apostrophe correction
    text = re.sub(r"\b(\w+)s'\b", r"\1s'", text)         # Handles plural possessives
    return text

# Enhanced punctuation
def enhance_punctuation(text):
    text = re.sub(r'\s+([?.!,";:])', r'\1', text)         # Remove extra space before punctuation
    text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)     # Add space after punctuation if needed
    return text

# Paraphrasing using synonyms and correcting semantic errors
def rephrase_with_synonyms(text):
    doc = nlp(text)
    rephrased_text = []

    for token in doc:
        pos_tag = None
        if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
            pos_tag = getattr(wordnet, token.pos_)

        if pos_tag:
            synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
            if synonyms:
                synonym = synonyms[0]
                if token.pos_ == "VERB":
                    if token.tag_ == "VBG":
                        synonym = synonym + 'ing'
                    elif token.tag_ in ["VBD", "VBN"]:
                        synonym = synonym + 'ed'
                    elif token.tag_ == "VBZ":
                        synonym = synonym + 's'
                rephrased_text.append(synonym)
            else:
                rephrased_text.append(token.text)
        else:
            rephrased_text.append(token.text)

    return ' '.join(rephrased_text)

# Comprehensive text correction
def paraphrase_and_correct(text):
    text = enhanced_spell_check(text)
    text = remove_redundant_words(text)
    text = capitalize_sentences_and_nouns(text)
    text = correct_tense_errors(text)
    text = correct_singular_plural_errors(text)
    text = correct_article_errors(text)
    text = enhance_punctuation(text)
    text = correct_apostrophes(text)
    text = rephrase_with_synonyms(text)
    text = correct_double_negatives(text)
    text = ensure_subject_verb_agreement(text)
    return text

# Integrate with Gradio UI
def gradio_interface(text):
    corrected_text = paraphrase_and_correct(text)
    return corrected_text

iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
    outputs=[gr.Textbox(label="Corrected Text")],
    title="Grammar & Semantic Error Correction",
)

if __name__ == "__main__":
    iface.launch()