Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

File size: 9,944 Bytes

import os
import gradio as gr
from transformers import pipeline
import spacy
import nltk
from nltk.corpus import wordnet
from spellchecker import SpellChecker
import re
import inflect

# Initialize components
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading spaCy model...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# Initialize the English text classification pipeline for AI detection
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")

# Initialize the spell checker
spell = SpellChecker()

# Initialize the inflect engine for pluralization
inflect_engine = inflect.engine()

# Ensure necessary NLTK data is downloaded
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

def predict_en(text):
    res = pipeline_en(text)[0]
    return res['label'], res['score']

def get_synonyms_nltk(word, pos):
    synsets = wordnet.synsets(word, pos=pos)
    if synsets:
        lemmas = synsets[0].lemmas()
        return [lemma.name() for lemma in lemmas if lemma.name() != word]
    return []

def remove_redundant_words(text):
    doc = nlp(text)
    meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
    filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
    return ' '.join(filtered_text)

def capitalize_sentences_and_nouns(text):
    doc = nlp(text)
    corrected_text = []

    for sent in doc.sents:
        sentence = []
        for token in sent:
            if token.i == sent.start or token.pos_ == "PROPN":
                sentence.append(token.text.capitalize())
            else:
                sentence.append(token.text)
        corrected_text.append(' '.join(sentence))

    return ' '.join(corrected_text)

def correct_tense_errors(text):
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
            corrected_text.append(lemma)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

def correct_singular_plural_errors(text):
    doc = nlp(text)
    corrected_text = []
    
    for token in doc:
        if token.pos_ == "NOUN":
            if token.tag_ == "NN" and any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
                corrected_text.append(inflect_engine.plural(token.lemma_))
            elif token.tag_ == "NNS" and any(child.text.lower() in ['a', 'one'] for child in token.head.children):
                corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
            else:
                corrected_text.append(token.text)
        else:
            corrected_text.append(token.text)
    
    return ' '.join(corrected_text)

def correct_article_errors(text):
    doc = nlp(text)
    corrected_text = []
    for i, token in enumerate(doc):
        if token.text.lower() in ['a', 'an']:
            next_token = doc[i + 1] if i + 1 < len(doc) else None
            if next_token and next_token.text[0].lower() in "aeiou":
                corrected_text.append("an")
            else:
                corrected_text.append("a")
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

def correct_double_negatives(text):
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.dep_ == "neg" and any(child.dep_ == "neg" for child in token.head.children):
            continue
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

def ensure_subject_verb_agreement(text):
    doc = nlp(text)
    corrected_text = []
    for token in doc:
        if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
            if token.tag_ == "NN" and token.head.tag_ != "VBZ":
                corrected_text.append(token.head.lemma_ + "s")
            elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
                corrected_text.append(token.head.lemma_)
            else:
                corrected_text.append(token.head.text)
        else:
            corrected_text.append(token.text)
    return ' '.join(corrected_text)

def enhanced_spell_check(text):
    words = text.split()
    corrected_words = []
    for word in words:
        if '_' in word:
            sub_words = word.split('_')
            corrected_sub_words = [spell.correction(w) or w for w in sub_words]
            corrected_words.append('_'.join(corrected_sub_words))
        else:
            corrected_word = spell.correction(word) or word
            corrected_words.append(corrected_word)
    return ' '.join(corrected_words)

def correct_semantic_errors(text):
    semantic_corrections = {
        "animate_being": "animal",
        "little": "smallest",
        "big": "largest",
        "mammalian": "mammals",
        "universe": "world",
        "manner": "ways",
        "continue": "preserve",
        "dirt": "soil",
        "wellness": "health",
        "modulate": "regulate",
        "clime": "climate",
        "function": "role",
        "keeping": "maintaining",
        "lend": "contribute",
        "better": "improve",
        "cardinal": "key",
        "expeditiously": "efficiently",
        "marauder": "predator",
        "quarry": "prey",
        "forestalling": "preventing",
        "bend": "turn",
        "works": "plant",
        "croping": "grazing",
        "flora": "vegetation",
        "dynamical": "dynamic",
        "alteration": "change",
        "add-on": "addition",
        "indispensable": "essential",
        "nutrient": "food",
        "harvest": "crops",
        "pollenateing": "pollinating",
        "divers": "diverse",
        "beginning": "source",
        "homo": "humans",
        "fall_in": "collapse",
        "takeing": "leading",
        "coinage": "species",
        "trust": "rely",
        "angleworm": "earthworm",
        "interrupt": "break",
        "affair": "matter",
        "air_out": "aerate",
        "alimentary": "nutrient",
        "distributeed": "spread",
        "country": "areas",
        "reconstruct": "restore",
        "debauched": "degraded",
        "giant": "whales",
        "organic_structure": "bodies",
        "decease": "die",
        "carcase": "carcasses",
        "pin_downing": "trapping",
        "cut_downs": "reduces",
        "ambiance": "atmosphere",
        "extenuateing": "mitigating",
        "decision": "conclusion",
        "doing": "making",
        "prolongs": "sustains",
        "home_ground": "habitats",
        "continueing": "preserving",
        "populateing": "living",
        "beingness": "beings"
    }
    
    words = text.split()
    corrected_words = [semantic_corrections.get(word.lower(), word) for word in words]
    return ' '.join(corrected_words)

def enhance_punctuation(text):
    text = re.sub(r'\s+([?.!,";:])', r'\1', text)
    text = re.sub(r'([?.!,";:])(\S)', r'\1 \2', text)
    text = re.sub(r'\s*"\s*', '" ', text).strip()
    text = re.sub(r'([.!?])\s*([a-z])', lambda m: m.group(1) + ' ' + m.group(2).upper(), text)
    text = re.sub(r'([a-z])\s+([A-Z])', r'\1. \2', text)
    return text

def correct_apostrophes(text):
    text = re.sub(r"\b(\w+)s\b(?<!\'s)", r"\1's", text)
    text = re.sub(r"\b(\w+)s'\b", r"\1s'", text)
    return text

def handle_possessives(text):
    text = re.sub(r"\b(\w+)'s\b", r"\1's", text)
    return text

def rephrase_with_synonyms(text):
    doc = nlp(text)
    rephrased_text = []

    for token in doc:
        if token.text.lower() == "earth":
            rephrased_text.append("Earth")
            continue
        
        pos_tag = None
        if token.pos_ in ["NOUN", "VERB", "ADJ", "ADV"]:
            pos_tag = getattr(wordnet, token.pos_)
        
        if pos_tag:
            synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
            if synonyms:
                synonym = synonyms[0]
                if token.pos_ == "VERB":
                    if token.tag_ == "VBG":
                        synonym = synonym + 'ing'
                    elif token.tag_ in ["VBD", "VBN"]:
                        synonym = synonym + 'ed'
                    elif token.tag_ == "VBZ":
                        synonym = synonym + 's'
                rephrased_text.append(synonym)
            else:
                rephrased_text.append(token.text)
        else:
            rephrased_text.append(token.text)

    return ' '.join(rephrased_text)

def paraphrase_and_correct(text):
    text = enhanced_spell_check(text)
    text = correct_semantic_errors(text)
    text = remove_redundant_words(text)
    text = capitalize_sentences_and_nouns(text)
    text = correct_tense_errors(text)
    text = correct_singular_plural_errors(text)
    text = correct_article_errors(text)
    text = enhance_punctuation(text)
    text = correct_apostrophes(text)
    text = handle_possessives(text)
    text = rephrase_with_synonyms(text)
    text = correct_double_negatives(text)
    text = ensure_subject_verb_agreement(text)
    text = ' '.join(word.capitalize() if word.lower() in ['i', 'earth'] else word for word in text.split())
    return text

def detect_ai(text):
    label, score = predict_en(text)
    return label, score

def gradio_interface(text):
    label, score = detect_ai(text)
    corrected_text = paraphrase_and_correct(text)
    return {label: score}, corrected_text

iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
    outputs=[
        gr.Label(num_top_classes=1),
        gr.Textbox(label="Corrected Text")
    ],
    title="AI Detection and Grammar Correction",
    description="Detect AI-generated content and correct grammar issues."
)

if __name__ == "__main__":
    iface.launch()