File size: 2,663 Bytes
29edf23
84669bc
59f9d5b
29edf23
 
30196dc
59f9d5b
 
936bfca
30196dc
 
 
 
936bfca
59f9d5b
 
 
c93f011
 
 
29edf23
 
c93f011
936bfca
59f9d5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10dc1f6
30196dc
 
 
 
 
b3aee5e
4d1390a
 
ea28e08
84669bc
59f9d5b
 
 
 
 
 
 
 
4d1390a
59f9d5b
 
776fa07
84669bc
4d1390a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Import dependencies
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import nltk
from nltk.corpus import wordnet
from gensim.models import KeyedVectors
from nltk.tokenize import word_tokenize

# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')  # Download WordNet

# Load Word2Vec model from Gensim
word_vectors = KeyedVectors.load_word2vec_format('path/to/GoogleNews-vectors-negative300.bin.gz', binary=True, limit=100000)  # Adjust path as needed

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# Function to get synonyms using Gensim Word2Vec
def get_synonyms_gensim(word):
    try:
        synonyms = word_vectors.most_similar(positive=[word], topn=5)
        return [synonym[0] for synonym in synonyms]
    except KeyError:
        return []

# Paraphrasing function using Gensim for synonym replacement
def paraphrase_text(text):
    words = word_tokenize(text)
    paraphrased_words = []
    for word in words:
        synonyms = get_synonyms_gensim(word.lower())
        if synonyms:
            paraphrased_words.append(synonyms[0])
        else:
            paraphrased_words.append(word)
    return ' '.join(paraphrased_words)

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
    return f"AI-Generated Content Probability: {ai_probability:.2f}%"

# Gradio interface definition
with gr.Blocks() as interface:
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(lines=5, label="Input Text")
            detect_button = gr.Button("AI Detection")
            paraphrase_button = gr.Button("Paraphrase Text")
        with gr.Column():
            output_text = gr.Textbox(label="Output")

    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
    paraphrase_button.click(paraphrase_text, inputs=text_input, outputs=output_text)

# Launch the Gradio app
interface.launch(debug=False)