Spaces:
Running
Running
File size: 4,693 Bytes
84669bc ddf9006 29edf23 7feda08 6ba2176 7fc55d1 7feda08 9dbd21f 8e09e8c 7fc55d1 6ba2176 7feda08 c93f011 29edf23 41941cd 5065a5b 9dbd21f c3f5d2b 2ff4e71 41941cd 2ff4e71 41941cd 2ff4e71 5065a5b 3c39506 5065a5b 73ae45e 5065a5b 73ae45e 5065a5b 73ae45e 5065a5b 73ae45e 3c39506 5065a5b 73ae45e 5065a5b 9dbd21f ddf9006 9dbd21f ddf9006 41941cd ddf9006 c3f5d2b ddf9006 41941cd 5065a5b 3c39506 5065a5b 9fdd716 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gensim import downloader as api
from gingerit.gingerit import GingerIt
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
# Ensure the spaCy model is installed
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")
# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
# Initialize GingerIt parser
parser = GingerIt()
# AI detection function using DistilBERT
def detect_ai_generated(text):
inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
with torch.no_grad():
outputs = model_ai(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
synsets = wordnet.synsets(word, pos=pos)
if synsets:
lemmas = synsets[0].lemmas()
return [lemma.name() for lemma in lemmas]
return []
# Function to capitalize the first letter of sentences and proper nouns
def capitalize_sentences_and_nouns(text):
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start: # First word of the sentence
sentence.append(token.text.capitalize())
elif token.pos_ == "PROPN": # Proper noun
sentence.append(token.text.capitalize())
else:
sentence.append(token.text)
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
# Paraphrasing function using spaCy and NLTK
def paraphrase_with_spacy_nltk(text):
doc = nlp(text)
paraphrased_words = []
for token in doc:
# Map spaCy POS tags to WordNet POS tags
pos = None
if token.pos_ in {"NOUN"}:
pos = wordnet.NOUN
elif token.pos_ in {"VERB"}:
pos = wordnet.VERB
elif token.pos_ in {"ADJ"}:
pos = wordnet.ADJ
elif token.pos_ in {"ADV"}:
pos = wordnet.ADV
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
# Replace with a synonym only if it makes sense
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
paraphrased_words.append(synonyms[0])
else:
paraphrased_words.append(token.text)
# Join the words back into a sentence
paraphrased_sentence = ' '.join(paraphrased_words)
# Capitalize sentences and proper nouns
corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)
return corrected_text
# Function to correct grammar using GingerIt
def correct_grammar(text):
result = parser.parse(text)
return result['result']
# Combined function: Paraphrase -> Capitalization -> Grammar Correction
def paraphrase_and_correct(text):
# Step 1: Paraphrase the text
paraphrased_text = paraphrase_with_spacy_nltk(text)
# Step 2: Capitalize sentences and proper nouns
capitalized_text = capitalize_sentences_and_nouns(paraphrased_text)
# Step 3: Correct grammar
final_text = correct_grammar(capitalized_text)
return final_text
# Gradio interface definition
with gr.Blocks() as interface:
with gr.Row():
with gr.Column():
text_input = gr.Textbox(lines=5, label="Input Text")
detect_button = gr.Button("AI Detection")
paraphrase_button = gr.Button("Paraphrase & Correct")
with gr.Column():
output_text = gr.Textbox(label="Output")
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_t
|