Spaces:
Running
Running
File size: 3,381 Bytes
84669bc 7feda08 29edf23 7feda08 6ba2176 7fc55d1 7feda08 936bfca 7fc55d1 6ba2176 7feda08 c93f011 29edf23 c93f011 5065a5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gensim import downloader as api
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
# Ensure the spaCy model is installed
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")
# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
# AI detection function using DistilBERT
def detect_ai_generated(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
synsets = wordnet.synsets(word, pos=pos)
if synsets:
lemmas = synsets[0].lemmas()
return [lemma.name() for lemma in lemmas]
return []
# Paraphrasing function using spaCy and NLTK (without grammar correction)
def paraphrase_with_spacy_nltk(text):
doc = nlp(text)
paraphrased_words = []
for token in doc:
# Map spaCy POS tags to WordNet POS tags
pos = None
if token.pos_ in {"NOUN"}:
pos = wordnet.NOUN
elif token.pos_ in {"VERB"}:
pos = wordnet.VERB
elif token.pos_ in {"ADJ"}:
pos = wordnet.ADJ
elif token.pos_ in {"ADV"}:
pos = wordnet.ADV
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
# Replace with a synonym only if it makes sense
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
paraphrased_words.append(synonyms[0])
else:
paraphrased_words.append(token.text)
# Join the words back into a sentence
paraphrased_sentence = ' '.join(paraphrased_words)
return paraphrased_sentence
# Gradio interface definition
with gr.Blocks() as interface:
with gr.Row():
with gr.Column():
text_input = gr.Textbox(lines=5, label="Input Text")
detect_button = gr.Button("AI Detection")
paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK")
with gr.Column():
output_text = gr.Textbox(label="Output")
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
# Launch the Gradio app
interface.launch(debug=False)
|