Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

File size: 8,119 Bytes

import os
import gradio as gr
from transformers import pipeline
import spacy
import nltk
from nltk.corpus import wordnet
from spellchecker import SpellChecker
import re
import inflect

# Initialize components
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading spaCy model...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# Initialize the spell checker
spell = SpellChecker()

# Initialize the inflect engine for pluralization
inflect_engine = inflect.engine()

# Ensure necessary NLTK data is downloaded
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

# Function to remove redundant/filler words
def remove_redundant_words(text):
    doc = nlp(text)
    meaningless_words = {"actually", "basically", "literally", "really", "very", "just", "quite", "rather", "simply", 
                         "that", "kind of", "sort of", "you know", "honestly", "seriously"}
    filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
    return ' '.join(filtered_text)

# Function to capitalize sentences and proper nouns
def capitalize_sentences_and_nouns(text):
    doc = nlp(text)
    corrected_text = []
    for sent in doc.sents:
        sentence = []
        for token in sent:
            if token.i == sent.start or token.pos_ == "PROPN":
                sentence.append(token.text.capitalize())
            else:
                sentence.append(token.text.lower())
        corrected_text.append(' '.join(sentence))
    return ' '.join(corrected_text)

# Function to correct verb tenses
def correct_tense_errors(text):
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
            corrected_text.append(lemma)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

# Function to ensure subject-verb agreement
def ensure_subject_verb_agreement(text):
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
            if token.tag_ == "NN" and token.head.tag_ != "VBZ":
                corrected_text.append(token.head.lemma_ + "s")
            elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
                corrected_text.append(token.head.lemma_)
            else:
                corrected_text.append(token.head.text)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

# Function to correct apostrophe usage
def correct_apostrophes(text):
    text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text)  # Simple apostrophe correction
    text = re.sub(r"\b(\w+)s'\b", r"\1s'", text)         # Handles plural possessives
    return text

# Function to enhance punctuation usage
def enhance_punctuation(text):
    text = re.sub(r'\s+([?.!,";:])', r'\1', text)         # Remove extra space before punctuation
    text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)     # Add space after punctuation if needed
    text = re.sub(r'\s*"\s*', '" ', text).strip()         # Clean up spaces around quotes
    text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
    text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text)  # Ensure sentences start with capitalized words
    return text

# Function to correct semantic errors and replace with more appropriate words
def correct_semantic_errors(text):
    semantic_corrections = {
        "animate_being": "animal",
        "little": "smallest",
        "big": "largest",
        "mammalian": "mammals",
        "universe": "world",
        "manner": "ways",
        "continue": "preserve",
        "dirt": "soil",
        "wellness": "health",
        "modulate": "regulate",
        "clime": "climate",
        "function": "role",
        "keeping": "maintaining",
        "lend": "contribute",
        "better": "improve",
        "cardinal": "key",
        "expeditiously": "efficiently",
        "marauder": "predator",
        "quarry": "prey",
        "forestalling": "preventing",
        "bend": "turn",
        "works": "plant",
        "croping": "grazing",
        "flora": "vegetation",
        "dynamical": "dynamic",
        "alteration": "change",
        "add-on": "addition",
        "indispensable": "essential",
        "nutrient": "food",
        "harvest": "crops",
        "pollenateing": "pollinating",
        "divers": "diverse",
        "beginning": "source",
        "homo": "humans",
        "fall_in": "collapse",
        "takeing": "leading",
        "coinage": "species",
        "trust": "rely",
        "angleworm": "earthworm",
        "interrupt": "break",
        "affair": "matter",
        "air_out": "aerate",
        "alimentary": "nutrient",
        "distributeed": "spread",
        "country": "areas",
        "reconstruct": "restore",
        "debauched": "degraded",
        "giant": "whales",
        "organic_structure": "bodies",
        "decease": "die",
        "carcase": "carcasses",
        "pin_downing": "trapping",
        "cut_downs": "reduces",
        "ambiance": "atmosphere",
        "extenuateing": "mitigating",
        "decision": "conclusion",
        "doing": "making",
        "prolongs": "sustains",
        "home_ground": "habitats",
        "continueing": "preserving",
        "populateing": "living",
        "beingness": "beings"
    }
    
    words = text.split()
    corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
    return ' '.join(corrected_words)

# Function to rephrase using synonyms and adjust verb forms
def rephrase_with_synonyms(text):
    doc = nlp(text)
    rephrased_text = []

    for token in doc:
        pos_tag = None
        if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
            pos_tag = getattr(wordnet, token.pos_)

        if pos_tag:
            synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
            if synonyms:
                synonym = synonyms[0]
                if token.pos_ == "VERB":
                    if token.tag_ == "VBG":
                        synonym = synonym + 'ing'
                    elif token.tag_ in ["VBD", "VBN"]:
                        synonym = synonym + 'ed'
                    elif token.tag_ == "VBZ":
                        synonym = synonym + 's'
                rephrased_text.append(synonym)
            else:
                rephrased_text.append(token.text)
        else:
            rephrased_text.append(token.text)

    return ' '.join(rephrased_text)

# Function to apply enhanced spell check
def enhanced_spell_check(text):
    words = text.split()
    corrected_words = []
    for word in words:
        if '_' in word:
            sub_words = word.split('_')
            corrected_sub_words = [spell.correction(w) or w for w in sub_words]
            corrected_words.append('_'.join(corrected_sub_words))
        else:
            corrected_word = spell.correction(word) or word
            corrected_words.append(corrected_word)
    return ' '.join(corrected_words)

# Comprehensive function to correct the entire text
def paraphrase_and_correct(text):
    text = enhanced_spell_check(text)
    text = remove_redundant_words(text)
    text = capitalize_sentences_and_nouns(text)
    text = correct_tense_errors(text)
    text = ensure_subject_verb_agreement(text)
    text = enhance_punctuation(text)
    text = correct_apostrophes(text)
    text = correct_semantic_errors(text)
    text = rephrase_with_synonyms(text)
    return text

# Gradio interface function
def gradio_interface(text):
    corrected_text = paraphrase_and_correct(text)
    return corrected_text

# Setting up Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
    outputs=[gr.Textbox(label="Corrected Text")],
    title="Grammar & Semantic Error Correction",
)

# Run the Gradio interface
if __name__ == "__main__":
    iface.launch()