import os import gradio as grimport os import gradio as gr from transformers import pipeline import spacy import subprocess import nltk from nltk.corpus import wordnet from spellchecker import SpellChecker import re # Initialize the English text classification pipeline for AI detection pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta") # Initialize the spell checker spell = SpellChecker() # Ensure necessary NLTK data is downloaded nltk.download('wordnet') nltk.download('omw-1.4') # Ensure the SpaCy model is installed try: nlp = spacy.load("en_core_web_sm") except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Function to predict the label and score for English text (AI Detection) def predict_en(text): res = pipeline_en(text)[0] return res['label'], res['score'] # Function to get synonyms using NLTK WordNet def get_synonyms_nltk(word, pos): synsets = wordnet.synsets(word, pos=pos) if synsets: lemmas = synsets[0].lemmas() return [lemma.name() for lemma in lemmas] return [] # Function to remove redundant and meaningless words def remove_redundant_words(text): doc = nlp(text) meaningless_words = {"actually", "basically", "literally", "really", "very", "just"} filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words] return ' '.join(filtered_text) # Function to capitalize the first letter of sentences and proper nouns def capitalize_sentences_and_nouns(text): doc = nlp(text) corrected_text = [] for sent in doc.sents: sentence = [] for token in sent: if token.i == sent.start: # First word of the sentence sentence.append(token.text.capitalize()) elif token.pos_ == "PROPN": # Proper noun sentence.append(token.text.capitalize()) else: sentence.append(token.text) corrected_text.append(' '.join(sentence)) return ' '.join(corrected_text) # Function to correct tense errors in a sentence def correct_tense_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}: lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text corrected_text.append(lemma) else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to correct singular/plural errors def correct_singular_plural_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.pos_ == "NOUN": if token.tag_ == "NN": # Singular noun if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children): corrected_text.append(token.lemma_ + 's') else: corrected_text.append(token.text) elif token.tag_ == "NNS": # Plural noun if any(child.text.lower() in ['a', 'one'] for child in token.head.children): corrected_text.append(token.lemma_) else: corrected_text.append(token.text) else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to check and correct article errors def correct_article_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.text in ['a', 'an']: next_token = token.nbor(1) if token.text == "a" and next_token.text[0].lower() in "aeiou": corrected_text.append("an") elif token.text == "an" and next_token.text[0].lower() not in "aeiou": corrected_text.append("a") else: corrected_text.append(token.text) else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to get the correct synonym while maintaining verb form def replace_with_synonym(token): pos = None if token.pos_ == "VERB": pos = wordnet.VERB elif token.pos_ == "NOUN": pos = wordnet.NOUN elif token.pos_ == "ADJ": pos = wordnet.ADJ elif token.pos_ == "ADV": pos = wordnet.ADV synonyms = get_synonyms_nltk(token.lemma_, pos) if synonyms: synonym = synonyms[0] if token.tag_ == "VBG": # Present participle (e.g., running) synonym = synonym + 'ing' elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle synonym = synonym + 'ed' elif token.tag_ == "VBZ": # Third-person singular present synonym = synonym + 's' return synonym return token.text # Function to check for and avoid double negatives def correct_double_negatives(text): doc = nlp(text) corrected_text = [] for token in doc: if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children): corrected_text.append("always") else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to ensure subject-verb agreement def ensure_subject_verb_agreement(text): doc = nlp(text) corrected_text = [] for token in doc: if token.dep_ == "nsubj" and token.head.pos_ == "VERB": if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb corrected_text.append(token.head.lemma_ + "s") elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb corrected_text.append(token.head.lemma_) corrected_text.append(token.text) return ' '.join(corrected_text) # Function to correct spelling errors def correct_spelling(text): words = text.split() corrected_words = [] for word in words: corrected_word = spell.correction(word) corrected_words.append(corrected_word if corrected_word else word) # Keep original if correction is None return ' '.join(corrected_words) # Function to correct punctuation issues def correct_punctuation(text): text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove space before punctuation text = re.sub(r'([?.!,";:])\s+', r'\1 ', text) # Ensure a single space after punctuation return text # Function to ensure correct handling of possessive forms def handle_possessives(text): text = re.sub(r"\b(\w+)'s\b", r"\1's", text) # Preserve possessive forms return text # Function to rephrase text and replace words with their synonyms while maintaining form def rephrase_with_synonyms(text): doc = nlp(text) rephrased_text = [] for token in doc: if token.pos_ == "NOUN" and token.text.lower() == "earth": rephrased_text.append("Earth") continue pos_tag = None if token.pos_ == "NOUN": pos_tag = wordnet.NOUN elif token.pos_ == "VERB": pos_tag = wordnet.VERB elif token.pos_ == "ADJ": pos_tag = wordnet.ADJ elif token.pos_ == "ADV": pos_tag = wordnet.ADV if pos_tag: synonyms = get_synonyms_nltk(token.lemma_, pos_tag) if synonyms: synonym = synonyms[0] # Just using the first synonym for simplicity if token.pos_ == "VERB": if token.tag_ == "VBG": # Present participle (e.g., running) synonym = synonym + 'ing' elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle synonym = synonym + 'ed' elif token.tag_ == "VBZ": # Third-person singular present synonym = synonym + 's' rephrased_text.append(synonym) else: rephrased_text.append(token.text) else: rephrased_text.append(token.text) return ' '.join(rephrased_text) # Function to paraphrase and correct grammar with enhanced accuracy def paraphrase_and_correct(text): # Remove meaningless or redundant words first cleaned_text = remove_redundant_words(text) # Capitalize sentences and nouns paraphrased_text = capitalize_sentences_and_nouns(cleaned_text) # Correct tense and singular/plural errors paraphrased_text = correct_tense_errors(paraphrased_text) paraphrased_text = correct_singular_plural_errors(paraphrased_text) paraphrased_text = correct_article_errors(paraphrased_text) paraphrased_text = correct_double_negatives(paraphrased_text) paraphrased_text = ensure_subject_verb_agreement(paraphrased_text) # Correct spelling and punctuation paraphrased_text = correct_spelling(paraphrased_text) paraphrased_text = correct_punctuation(paraphrased_text) paraphrased_text = handle_possessives(paraphrased_text) # Handle possessives # Rephrase with synonyms paraphrased_text = rephrase_with_synonyms(paraphrased_text) # Force capitalization of the first letter of each sentence final_text = force_first_letter_capital(paraphrased_text) return final_text # Gradio Interface def process_text(input_text): ai_label, ai_score = predict_en(input_text) corrected_text = paraphrase_and_correct(input_text) return ai_label, ai_score, corrected_text # Create Gradio interface iface = gr.Interface( fn=process_text, inputs="text", outputs=["text", "number", "text"], title="AI Content Detection and Grammar Correction", description="Enter text to detect AI-generated content and correct grammar." ) # Launch the Gradio app if __name__ == "__main__": iface.launch() from transformers import pipeline import spacy import subprocess import nltk from nltk.corpus import wordnet from spellchecker import SpellChecker import re # Initialize the English text classification pipeline for AI detection pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta") # Initialize the spell checker spell = SpellChecker() # Ensure necessary NLTK data is downloaded nltk.download('wordnet') nltk.download('omw-1.4') # Ensure the SpaCy model is installed try: nlp = spacy.load("en_core_web_sm") except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Function to predict the label and score for English text (AI Detection) def predict_en(text): res = pipeline_en(text)[0] return res['label'], res['score'] # Function to get synonyms using NLTK WordNet def get_synonyms_nltk(word, pos): synsets = wordnet.synsets(word, pos=pos) if synsets: lemmas = synsets[0].lemmas() return [lemma.name() for lemma in lemmas] return [] # Function to remove redundant and meaningless words def remove_redundant_words(text): doc = nlp(text) meaningless_words = {"actually", "basically", "literally", "really", "very", "just"} filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words] return ' '.join(filtered_text) # Function to capitalize the first letter of sentences and proper nouns def capitalize_sentences_and_nouns(text): doc = nlp(text) corrected_text = [] for sent in doc.sents: sentence = [] for token in sent: if token.i == sent.start: # First word of the sentence sentence.append(token.text.capitalize()) elif token.pos_ == "PROPN": # Proper noun sentence.append(token.text.capitalize()) else: sentence.append(token.text) corrected_text.append(' '.join(sentence)) return ' '.join(corrected_text) # Function to force capitalization of the first letter of every sentence def force_first_letter_capital(text): sentences = text.split(". ") # Split by period to get each sentence capitalized_sentences = [sentence[0].capitalize() + sentence[1:] if sentence else "" for sentence in sentences] return ". ".join(capitalized_sentences) # Function to correct tense errors in a sentence def correct_tense_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}: lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text corrected_text.append(lemma) else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to correct singular/plural errors def correct_singular_plural_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.pos_ == "NOUN": if token.tag_ == "NN": # Singular noun if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children): corrected_text.append(token.lemma_ + 's') else: corrected_text.append(token.text) elif token.tag_ == "NNS": # Plural noun if any(child.text.lower() in ['a', 'one'] for child in token.head.children): corrected_text.append(token.lemma_) else: corrected_text.append(token.text) else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to check and correct article errors def correct_article_errors(text): doc = nlp(text) corrected_text = [] for token in doc: if token.text in ['a', 'an']: next_token = token.nbor(1) if token.text == "a" and next_token.text[0].lower() in "aeiou": corrected_text.append("an") elif token.text == "an" and next_token.text[0].lower() not in "aeiou": corrected_text.append("a") else: corrected_text.append(token.text) else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to get the correct synonym while maintaining verb form def replace_with_synonym(token): pos = None if token.pos_ == "VERB": pos = wordnet.VERB elif token.pos_ == "NOUN": pos = wordnet.NOUN elif token.pos_ == "ADJ": pos = wordnet.ADJ elif token.pos_ == "ADV": pos = wordnet.ADV synonyms = get_synonyms_nltk(token.lemma_, pos) if synonyms: synonym = synonyms[0] if token.tag_ == "VBG": # Present participle (e.g., running) synonym = synonym + 'ing' elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle synonym = synonym + 'ed' elif token.tag_ == "VBZ": # Third-person singular present synonym = synonym + 's' return synonym return token.text # Function to check for and avoid double negatives def correct_double_negatives(text): doc = nlp(text) corrected_text = [] for token in doc: if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children): corrected_text.append("always") else: corrected_text.append(token.text) return ' '.join(corrected_text) # Function to ensure subject-verb agreement def ensure_subject_verb_agreement(text): doc = nlp(text) corrected_text = [] for token in doc: if token.dep_ == "nsubj" and token.head.pos_ == "VERB": if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb corrected_text.append(token.head.lemma_ + "s") elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb corrected_text.append(token.head.lemma_) corrected_text.append(token.text) return ' '.join(corrected_text) # Function to correct spelling errors def correct_spelling(text): words = text.split() corrected_words = [] for word in words: corrected_word = spell.correction(word) corrected_words.append(corrected_word if corrected_word else word) # Keep original if correction is None return ' '.join(corrected_words) # Function to correct punctuation issues def correct_punctuation(text): text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove space before punctuation text = re.sub(r'([?.!,";:])\s+', r'\1 ', text) # Ensure a single space after punctuation return text # Function to ensure correct handling of possessive forms def handle_possessives(text): text = re.sub(r"\b(\w+)'s\b", r"\1's", text) # Preserve possessive forms return text # Function to rephrase text and replace words with their synonyms while maintaining form def rephrase_with_synonyms(text): doc = nlp(text) rephrased_text = [] for token in doc: pos_tag = None if token.pos_ == "NOUN": pos_tag = wordnet.NOUN elif token.pos_ == "VERB": pos_tag = wordnet.VERB elif token.pos_ == "ADJ": pos_tag = wordnet.ADJ elif token.pos_ == "ADV": pos_tag = wordnet.ADV if pos_tag: synonyms = get_synonyms_nltk(token.text, pos_tag) if synonyms: synonym = synonyms[0] # Just using the first synonym for simplicity if token.pos_ == "VERB": if token.tag_ == "VBG": # Present participle (e.g., running) synonym = synonym + 'ing' elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle synonym = synonym + 'ed' elif token.tag_ == "VBZ": # Third-person singular present synonym = synonym + 's' elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns synonym += 's' if not synonym.endswith('s') else "" rephrased_text.append(synonym) else: rephrased_text.append(token.text) else: rephrased_text.append(token.text) return ' '.join(rephrased_text) # Function to paraphrase and correct grammar with enhanced accuracy def paraphrase_and_correct(text): # Remove meaningless or redundant words first cleaned_text = remove_redundant_words(text) # Capitalize sentences and nouns paraphrased_text = capitalize_sentences_and_nouns(cleaned_text) # Correct tense and singular/plural errors paraphrased_text = correct_tense_errors(paraphrased_text) paraphrased_text = correct_singular_plural_errors(paraphrased_text) paraphrased_text = correct_article_errors(paraphrased_text) paraphrased_text = correct_double_negatives(paraphrased_text) paraphrased_text = ensure_subject_verb_agreement(paraphrased_text) # Correct spelling and punctuation paraphrased_text = correct_spelling(paraphrased_text) paraphrased_text = correct_punctuation(paraphrased_text) paraphrased_text = handle_possessives(paraphrased_text) # Handle possessives # Rephrase with synonyms paraphrased_text = rephrase_with_synonyms(paraphrased_text) # Force capitalization of the first letter of each sentence final_text = force_first_letter_capital(paraphrased_text) return final_text # Gradio Interface def process_text(input_text): ai_label, ai_score = predict_en(input_text) corrected_text = paraphrase_and_correct(input_text) return ai_label, ai_score, corrected_text # Create Gradio interface iface = gr.Interface( fn=process_text, inputs="text", outputs=["text", "number", "text"], title="AI Content Detection and Grammar Correction", description="Enter text to detect AI-generated content and correct grammar." ) # Launch the Gradio app if __name__ == "__main__": iface.launch()