huamnifierWithSimpleGrammer

Running

File size: 4,358 Bytes

84669bc
17f790c
29edf23
7feda08
6ba2176
7fc55d1
 
7feda08
8e09e8c
7fc55d1
 
 
6ba2176
 
 
 
 
 
 
7feda08
 
 
 
c93f011
 
 
29edf23
17f790c
 
 
 
 
 
5065a5b
2ff4e71
 
17f790c
2ff4e71
17f790c
2ff4e71
 
 
 
5065a5b
 
 
 
 
 
 
 
7e4465c
5065a5b
 
 
 
 
73ae45e
5065a5b
 
 
 
 
 
 
 
 
 
 
 
73ae45e
5065a5b
 
 
 
 
73ae45e
5065a5b
73ae45e
a3485f7
 
17f790c
73ae45e
17f790c
 
 
 
 
a3485f7
73ae45e
 
 
a3485f7
 
73ae45e
 
5065a5b
73ae45e
5065a5b
 
 
 
 
 
 
73ae45e
5065a5b
 
 
 
73ae45e
5065a5b

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gensim import downloader as api

# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')

# Ensure the spaCy model is installed
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")

# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# Load the grammar correction model
tokenizer_gc = AutoTokenizer.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis")
model_gc = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis").to(device)

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model_ai(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
    return f"AI-Generated Content Probability: {ai_probability:.2f}%"

# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
    synsets = wordnet.synsets(word, pos=pos)
    if synsets:
        lemmas = synsets[0].lemmas()
        return [lemma.name() for lemma in lemmas]
    return []

# Paraphrasing function using spaCy and NLTK (without grammar correction)
def paraphrase_with_spacy_nltk(text):
    doc = nlp(text)
    paraphrased_words = []
    
    for token in doc:
        # Map spaCy POS tags to WordNet POS tags
        pos = None
        if token.pos_ in {"NOUN"}:
            pos = wordnet.NOUN
        elif token.pos_ in {"VERB"}:
            pos = wordnet.VERB
        elif token.pos_ in {"ADJ"}:
            pos = wordnet.ADJ
        elif token.pos_ in {"ADV"}:
            pos = wordnet.ADV
        
        synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
        
        # Replace with a synonym only if it makes sense
        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
            paraphrased_words.append(synonyms[0])
        else:
            paraphrased_words.append(token.text)
    
    # Join the words back into a sentence
    paraphrased_sentence = ' '.join(paraphrased_words)
    
    return paraphrased_sentence

# Grammar correction function using the T5 model
def correct_grammar(text):
    inputs = tokenizer_gc(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model_gc.generate(inputs['input_ids'], max_length=512, num_beams=5, early_stopping=True)
    corrected_text = tokenizer_gc.decode(outputs[0], skip_special_tokens=True)
    return corrected_text

# Combined function: Paraphrase -> Grammar Check
def paraphrase_and_correct(text):
    # Step 1: Paraphrase the text
    paraphrased_text = paraphrase_with_spacy_nltk(text)
    
    # Step 2: Apply grammar correction
    corrected_text = correct_grammar(paraphrased_text)
    
    return corrected_text

# Gradio interface definition
with gr.Blocks() as interface:
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(lines=5, label="Input Text")
            detect_button = gr.Button("AI Detection")
            paraphrase_button = gr.Button("Paraphrase & Correct Grammar")
        with gr.Column():
            output_text = gr.Textbox(label="Output")

    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
    paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)

# Launch the Gradio app
interface.launch(debug=False)