import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import spacy import subprocess import nltk from nltk.corpus import wordnet from gensim import downloader as api from textblob import TextBlob # Import TextBlob for simple grammar correction # Ensure necessary NLTK data is downloaded nltk.download('wordnet') nltk.download('omw-1.4') # Ensure the spaCy model is installed try: nlp = spacy.load("en_core_web_sm") except OSError: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) nlp = spacy.load("en_core_web_sm") # Load a smaller Word2Vec model from Gensim's pre-trained models word_vectors = api.load("glove-wiki-gigaword-50") # Check for GPU and set the device accordingly device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load AI Detector model and tokenizer from Hugging Face (DistilBERT) tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device) # AI detection function using DistilBERT def detect_ai_generated(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device) with torch.no_grad(): outputs = model(**inputs) probabilities = torch.softmax(outputs.logits, dim=1) ai_probability = probabilities[0][1].item() # Probability of being AI-generated return f"AI-Generated Content Probability: {ai_probability:.2f}%" # Function to get synonyms using NLTK WordNet def get_synonyms_nltk(word, pos): synsets = wordnet.synsets(word, pos=pos) if synsets: lemmas = synsets[0].lemmas() return [lemma.name() for lemma in lemmas] return [] # Paraphrasing function using spaCy and NLTK (without grammar correction) def paraphrase_with_spacy_nltk(text): doc = nlp(text) paraphrased_words = [] for token in doc: # Map spaCy POS tags to WordNet POS tags pos = None if token.pos_ in {"NOUN"}: pos = wordnet.NOUN elif token.pos_ in {"VERB"}: pos = wordnet.VERB elif token.pos_ in {"ADJ"}: pos = wordnet.ADJ elif token.pos_ in {"ADV"}: pos = wordnet.ADV synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else [] # Replace with a synonym only if it makes sense if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower(): paraphrased_words.append(synonyms[0]) else: paraphrased_words.append(token.text) # Join the words back into a sentence paraphrased_sentence = ' '.join(paraphrased_words) return paraphrased_sentence # Grammar correction function using TextBlob def correct_grammar(text): blob = TextBlob(text) corrected_text = str(blob.correct()) return corrected_text # Combined function: Paraphrase -> Grammar Check def paraphrase_and_correct(text): # Step 1: Paraphrase the text paraphrased_text = paraphrase_with_spacy_nltk(text) # Step 2: Apply grammar correction corrected_text = correct_grammar(paraphrased_text) return corrected_text # Gradio interface definition with gr.Blocks() as interface: with gr.Row(): with gr.Column(): text_input = gr.Textbox(lines=5, label="Input Text") detect_button = gr.Button("AI Detection") paraphrase_button = gr.Button("Paraphrase & Correct Grammar") with gr.Column(): output_text = gr.Textbox(label="Output") detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text) paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text) # Launch the Gradio app interface.launch(debug=False)