Spaces:
Running
Running
File size: 4,052 Bytes
29edf23 84669bc 29edf23 23a08cd 936bfca 29edf23 936bfca 23a08cd c93f011 29edf23 c93f011 936bfca 29edf23 c93f011 35244e7 23a08cd 29edf23 c93f011 29edf23 10dc1f6 29edf23 84669bc 29edf23 c93f011 29edf23 ada2d1a 23a08cd 29edf23 84ec915 23a08cd 29edf23 84ec915 99b3c08 84669bc 29edf23 23a08cd 84669bc 776fa07 84669bc 29edf23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# Import dependencies
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
import torch
import nltk
import spacy
from nltk.corpus import wordnet
# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
# Load spaCy model for English
nlp = spacy.load("en_core_web_sm")
# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
# Load SRDdev Paraphrase model and tokenizer for humanizing text
paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase").to(device)
# Function to find synonyms using WordNet via NLTK
def get_synonyms(word):
synonyms = set()
for syn in wordnet.synsets(word):
for lemma in syn.lemmas():
synonyms.add(lemma.name())
return list(synonyms)
# Replace words with synonyms using spaCy and WordNet
def replace_with_synonyms(text):
doc = nlp(text)
processed_text = []
for token in doc:
synonyms = get_synonyms(token.text.lower())
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}: # Only replace certain types of words
replacement = synonyms[0] # Replace with the first synonym
if token.is_title:
replacement = replacement.capitalize()
processed_text.append(replacement)
else:
processed_text.append(token.text)
return " ".join(processed_text)
# AI detection function using DistilBERT
def detect_ai_generated(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
return ai_probability
# Humanize the AI-detected text using the SRDdev Paraphrase model
def humanize_text(AI_text):
paragraphs = AI_text.split("\n")
paraphrased_paragraphs = []
for paragraph in paragraphs:
if paragraph.strip():
inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True).to(device)
paraphrased_ids = paraphrase_model.generate(
inputs['input_ids'],
max_length=inputs['input_ids'].shape[-1] + 20, # Slightly more than the original input length
num_beams=4,
early_stopping=True,
length_penalty=1.0,
no_repeat_ngram_size=3,
)
paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
paraphrased_paragraphs.append(paraphrased_text)
return "\n\n".join(paraphrased_paragraphs)
# Main function to handle the overall process
def main_function(AI_text):
# Replace words with synonyms
text_with_synonyms = replace_with_synonyms(AI_text)
# Detect AI-generated content
ai_probability = detect_ai_generated(text_with_synonyms)
# Humanize AI text
humanized_text = humanize_text(text_with_synonyms)
return f"AI-Generated Content: {ai_probability:.2f}%\n\nHumanized Text:\n{humanized_text}"
# Gradio interface definition
interface = gr.Interface(
fn=main_function,
inputs="textbox",
outputs="textbox",
title="AI Text Humanizer with Synonym Replacement",
description="Enter AI-generated text and get a human-written version, with synonyms replaced for more natural output. This space uses models from Hugging Face directly."
)
# Launch the Gradio app
interface.launch(debug=True)
|