File size: 2,865 Bytes
29edf23
84669bc
29edf23
 
 
936bfca
29edf23
 
 
936bfca
c93f011
 
 
29edf23
 
c93f011
936bfca
29edf23
 
c93f011
35244e7
29edf23
 
c93f011
29edf23
 
 
 
 
10dc1f6
29edf23
84669bc
29edf23
 
 
 
c93f011
29edf23
 
 
 
 
 
 
 
 
 
 
 
 
ada2d1a
84ec915
29edf23
84ec915
10dc1f6
29edf23
84ec915
99b3c08
84669bc
 
29edf23
 
 
 
 
84669bc
776fa07
84669bc
29edf23
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Import dependencies
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
import torch
import nltk

# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')

# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

# Load SRDdev Paraphrase model and tokenizer for humanizing text
paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase").to(device)

# AI detection function using DistilBERT
def detect_ai_generated(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=1)
    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
    return ai_probability

# Humanize the AI-detected text using the SRDdev Paraphrase model
def humanize_text(AI_text):
    paragraphs = AI_text.split("\n")
    paraphrased_paragraphs = []
    for paragraph in paragraphs:
        if paragraph.strip():
            inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True).to(device)
            paraphrased_ids = paraphrase_model.generate(
                inputs['input_ids'],
                max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
                num_beams=4,
                early_stopping=True,
                length_penalty=1.0,
                no_repeat_ngram_size=3,
            )
            paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
            paraphrased_paragraphs.append(paraphrased_text)
    return "\n\n".join(paraphrased_paragraphs)

# Main function to handle the overall process
def main_function(AI_text):
    ai_probability = detect_ai_generated(AI_text)
    
    # Humanize AI text
    humanized_text = humanize_text(AI_text)
    
    return f"AI-Generated Content: {ai_probability:.2f}%\n\nHumanized Text:\n{humanized_text}"

# Gradio interface definition
interface = gr.Interface(
    fn=main_function,
    inputs="textbox",
    outputs="textbox",
    title="AI Text Humanizer",
    description="Enter AI-generated text and get a human-written version. This space uses models from Hugging Face directly."
)

# Launch the Gradio app
interface.launch(debug=True)