huamnifierWithSimpleGrammer

Running

File size: 3,765 Bytes

84669bc
7feda08
29edf23
7feda08
6ba2176
7fc55d1
 
545aa42
6ba2176
7feda08
936bfca
7fc55d1
 
 
6ba2176
 
 
 
 
 
 
7feda08
 
 
 
c93f011
 
 
29edf23
 
c93f011
936bfca
545aa42
 
 
 
 
e1bbde5
30196dc
 
 
 
 
b3aee5e
4d1390a
 
ea28e08
7fc55d1
 
 
 
 
 
 
7feda08
545aa42
7fc55d1
7feda08
 
7fc55d1
7feda08
7fc55d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7feda08
 
 
7fc55d1
 
 
e1bbde5
545aa42
 
e1bbde5
 
6b18ba5
7feda08
 
 
 
 
 
e1bbde5
7feda08
 
4d1390a
7feda08
7fc55d1
776fa07
84669bc
4d1390a

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gingerit.gingerit import GingerIt

from gensim import downloader as api

# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')

# Ensure the spaCy model is installed
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")

# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# Initialize Gingerit for grammar correction
def correct_grammar_with_gingerit(text):
    parser = GingerIt()
    result = parser.parse(text)
    return result['result']

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
    return f"AI-Generated Content Probability: {ai_probability:.2f}%"

# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
    synsets = wordnet.synsets(word, pos=pos)
    if synsets:
        lemmas = synsets[0].lemmas()
        return [lemma.name() for lemma in lemmas]
    return []

# Paraphrasing function using spaCy and NLTK with Gingerit grammar correction
def paraphrase_with_spacy_nltk(text):
    doc = nlp(text)
    paraphrased_words = []
    
    for token in doc:
        # Map spaCy POS tags to WordNet POS tags
        pos = None
        if token.pos_ in {"NOUN"}:
            pos = wordnet.NOUN
        elif token.pos_ in {"VERB"}:
            pos = wordnet.VERB
        elif token.pos_ in {"ADJ"}:
            pos = wordnet.ADJ
        elif token.pos_ in {"ADV"}:
            pos = wordnet.ADV
        
        synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
        
        # Replace with a synonym only if it makes sense
        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
            paraphrased_words.append(synonyms[0])
        else:
            paraphrased_words.append(token.text)
    
    # Join the words back into a sentence
    paraphrased_sentence = ' '.join(paraphrased_words)
    
    # Correct the grammar of the paraphrased sentence using Gingerit
    corrected_sentence = correct_grammar_with_gingerit(paraphrased_sentence)
    
    return corrected_sentence

# Gradio interface definition
with gr.Blocks() as interface:
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(lines=5, label="Input Text")
            detect_button = gr.Button("AI Detection")
            paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK (Grammar Corrected)")
        with gr.Column():
            output_text = gr.Textbox(label="Output")

    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
    paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)

# Launch the Gradio app
interface.launch(debug=False)