import os import gradio as gr from transformers import pipeline import spacy import nltk from nltk.corpus import wordnet from spellchecker import SpellChecker import re import inflect # Initialize components try: nlp = spacy.load("en_core_web_sm") except OSError: print("Downloading spaCy model...") spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") # Initialize the English text classification pipeline for AI detection pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta") # Initialize the spell checker spell = SpellChecker() # Initialize the inflect engine for pluralization inflect_engine = inflect.engine() # Ensure necessary NLTK data is downloaded nltk.download('wordnet', quiet=True) nltk.download('omw-1.4', quiet=True) def predict_en(text): res = pipeline_en(text)[0] return res['label'], res['score'] def get_synonyms_nltk(word, pos): synsets = wordnet.synsets(word, pos=pos) if synsets: lemmas = synsets[0].lemmas() return [lemma.name() for lemma in lemmas if lemma.name() != word] return [] def remove_redundant_words(text): doc = nlp(text) meaningless_words = {"actually", "basically", "literally", "really", "very", "just"} filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words] return ' '.join(filtered_text) def capitalize_sentences_and_nouns(text): doc = nlp(text) corrected_text = [] for sent in doc.sents: sentence = [] for token in sent: if token.i == sent.start or token.pos_ == "PROPN": sentence.append(token.text.capitalize()) else: sentence.append(token.text) corrected_text.append(' '.join(sentence)) return ' '.join(corrected_text) def correct_tense_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}: lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text corrected_text.append(lemma) else: corrected_text.append(token.text) return ' '.join(corrected_text) def correct_singular_plural_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.pos_ == "NOUN": if token.tag_ == "NN" and any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children): corrected_text.append(inflect_engine.plural(token.lemma_)) elif token.tag_ == "NNS" and any(child.text.lower() in ['a', 'one'] for child in token.head.children): corrected_text.append(inflect_engine.singular_noun(token.text) or token.text) else: corrected_text.append(token.text) else: corrected_text.append(token.text) return ' '.join(corrected_text) def correct_article_errors(text): doc = nlp(text) corrected_text = [] for i, token in enumerate(doc): if token.text.lower() in ['a', 'an']: next_token = doc[i + 1] if i + 1 < len(doc) else None if next_token and next_token.text[0].lower() in "aeiou": corrected_text.append("an") else: corrected_text.append("a") else: corrected_text.append(token.text) return ' '.join(corrected_text) def correct_double_negatives(text): doc = nlp(text) corrected_text = [] for token in doc: if token.dep_ == "neg" and any(child.dep_ == "neg" for child in token.head.children): continue else: corrected_text.append(token.text) return ' '.join(corrected_text) def ensure_subject_verb_agreement(text): doc = nlp(text) corrected_text = [] for token in doc: if token.dep_ == "nsubj" and token.head.pos_ == "VERB": if token.tag_ == "NN" and token.head.tag_ != "VBZ": corrected_text.append(token.head.lemma_ + "s") elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": corrected_text.append(token.head.lemma_) else: corrected_text.append(token.head.text) else: corrected_text.append(token.text) return ' '.join(corrected_text) def enhanced_spell_check(text): words = text.split() corrected_words = [] for word in words: if '_' in word: sub_words = word.split('_') corrected_sub_words = [spell.correction(w) or w for w in sub_words] corrected_words.append('_'.join(corrected_sub_words)) else: corrected_word = spell.correction(word) or word corrected_words.append(corrected_word) return ' '.join(corrected_words) def correct_semantic_errors(text): semantic_corrections = { "animate_being": "animal", "little": "smallest", "big": "largest", "mammalian": "mammals", "universe": "world", "manner": "ways", "continue": "preserve", "dirt": "soil", "wellness": "health", "modulate": "regulate", "clime": "climate", "function": "role", "keeping": "maintaining", "lend": "contribute", "better": "improve", "cardinal": "key", "expeditiously": "efficiently", "marauder": "predator", "quarry": "prey", "forestalling": "preventing", "bend": "turn", "works": "plant", "croping": "grazing", "flora": "vegetation", "dynamical": "dynamic", "alteration": "change", "add-on": "addition", "indispensable": "essential", "nutrient": "food", "harvest": "crops", "pollenateing": "pollinating", "divers": "diverse", "beginning": "source", "homo": "humans", "fall_in": "collapse", "takeing": "leading", "coinage": "species", "trust": "rely", "angleworm": "earthworm", "interrupt": "break", "affair": "matter", "air_out": "aerate", "alimentary": "nutrient", "distributeed": "spread", "country": "areas", "reconstruct": "restore", "debauched": "degraded", "giant": "whales", "organic_structure": "bodies", "decease": "die", "carcase": "carcasses", "pin_downing": "trapping", "cut_downs": "reduces", "ambiance": "atmosphere", "extenuateing": "mitigating", "decision": "conclusion", "doing": "making", "prolongs": "sustains", "home_ground": "habitats", "continueing": "preserving", "populateing": "living", "beingness": "beings" } words = text.split() corrected_words = [semantic_corrections.get(word.lower(), word) for word in words] return ' '.join(corrected_words) def enhance_punctuation(text): text = re.sub(r'\s+([?.!,";:])', r'\1', text) text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text) text = re.sub(r'\s*"\s*', '" ', text).strip() text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text) text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text) return text def correct_apostrophes(text): text = re.sub(r"\b(\w+)s\b(?