Spaces:
Running
Running
File size: 4,528 Bytes
84669bc 7feda08 29edf23 7feda08 6ba2176 7fc55d1 7feda08 a3485f7 936bfca 7fc55d1 a3485f7 6ba2176 7feda08 c93f011 29edf23 c93f011 5065a5b a3485f7 85e8aa6 2ff4e71 5065a5b a3485f7 5065a5b a3485f7 5065a5b a3485f7 85e8aa6 a3485f7 5065a5b a3485f7 5065a5b a3485f7 5065a5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gensim import downloader as api
import language_tool_python
# Install Java
def install_java():
subprocess.run(["apt-get", "update"])
subprocess.run(["apt-get", "install", "-y", "openjdk-11-jre"])
install_java()
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt') # Download the Punkt tokenizer for sentence tokenization
# Ensure the spaCy model is installed
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")
# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
# Function to correct grammar using LanguageTool
def correct_grammar_with_languagetool(text):
tool = language_tool_python.LanguageTool('en-US')
matches = tool.check(text)
corrected_text = language_tool_python.utils.correct(text, matches)
return corrected_text
# AI detection function using DistilBERT
def detect_ai_generated(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
synsets = wordnet.synsets(word, pos=pos)
if synsets:
lemmas = synsets[0].lemmas()
return [lemma.name() for lemma in lemmas]
return []
# Paraphrasing function using spaCy and NLTK
def paraphrase_with_spacy_nltk(text):
doc = nlp(text)
paraphrased_words = []
for token in doc:
pos = None
if token.pos_ in {"NOUN"}:
pos = wordnet.NOUN
elif token.pos_ in {"VERB"}:
pos = wordnet.VERB
elif token.pos_ in {"ADJ"}:
pos = wordnet.ADJ
elif token.pos_ in {"ADV"}:
pos = wordnet.ADV
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
paraphrased_words.append(synonyms[0])
else:
paraphrased_words.append(token.text)
paraphrased_sentence = ' '.join(paraphrased_words)
return paraphrased_sentence
# Sentence structuring using NLTK
def structure_sentences(text):
sentences = nltk.sent_tokenize(text) # Tokenize text into sentences
structured_sentences = []
for sentence in sentences:
# Here you can apply any structuring rules or logic you need.
structured_sentences.append(sentence)
structured_text = ' '.join(structured_sentences)
return structured_text
# Combined function: Paraphrase -> Structure -> Grammar Check
def humanize_text(text):
# Step 1: Paraphrase
paraphrased_text = paraphrase_with_spacy_nltk(text)
# Step 2: Structure sentences
structured_text = structure_sentences(paraphrased_text)
# Step 3: Apply grammar correction
final_text = correct_grammar_with_languagetool(structured_text)
return final_text
# Gradio interface definition
with gr.Blocks() as interface:
with gr.Row():
with gr.Column():
text_input = gr.Textbox(lines=5, label="Input Text")
detect_button = gr.Button("AI Detection")
humanize_button = gr.Button("Humanize Text")
with gr.Column():
output_text = gr.Textbox(label="Output")
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
humanize_button.click(humanize_text, inputs=text_input, outputs=output_text)
# Launch the Gradio app
interface.launch(debug=False)
|