sashtech's picture
Update app.py
13a208e verified
raw
history blame
6.02 kB
import gradio as gr
import torch
import spacy
import subprocess
import nltk
from nltk.corpus import wordnet
from gensim import downloader as api
from gramformer import Gramformer
# Ensure necessary NLTK data is downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')
# Ensure the spaCy model is installed
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Load a smaller Word2Vec model from Gensim's pre-trained models
word_vectors = api.load("glove-wiki-gigaword-50")
# Check for GPU and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load Gramformer for grammar correction (model 2 for correction)
gf = Gramformer(models=2, use_gpu=torch.cuda.is_available())
# AI detection model and tokenizer remain the same as before
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
# AI detection function using DistilBERT
def detect_ai_generated(text):
inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
with torch.no_grad():
outputs = model_ai(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
return f"AI-Generated Content Probability: {ai_probability * 100:.2f}%"
# Function to get synonyms using NLTK WordNet
def get_synonyms_nltk(word, pos):
synsets = wordnet.synsets(word, pos=pos)
if synsets:
lemmas = synsets[0].lemmas()
return [lemma.name() for lemma in lemmas]
return []
# Function to check and correct tenses and verbs using spaCy
def check_tense_and_correct(text):
doc = nlp(text)
corrected_text = []
for token in doc:
# Checking for verbs and their tense
if token.pos_ == 'VERB':
tense = token.tag_ # Get the specific tense tag (e.g., VBZ, VBD, VBG, etc.)
if tense == 'VBZ': # 3rd person singular present
corrected_text.append(token.lemma_) # Replace with base form (example: goes -> go)
elif tense == 'VBD': # Past tense
corrected_text.append(token.text) # Keep past tense as is
elif tense == 'VBG': # Gerund/Present participle
corrected_text.append(token.text) # Keep it unchanged for now
else:
corrected_text.append(token.text) # For other cases, append the word as is
else:
corrected_text.append(token.text)
return ' '.join(corrected_text)
# Function to capitalize the first letter of sentences and proper nouns
def capitalize_sentences_and_nouns(text):
doc = nlp(text)
corrected_text = []
for sent in doc.sents:
sentence = []
for token in sent:
if token.i == sent.start: # First word of the sentence
sentence.append(token.text.capitalize())
elif token.pos_ == "PROPN": # Proper noun
sentence.append(token.text.capitalize())
else:
sentence.append(token.text)
corrected_text.append(' '.join(sentence))
return ' '.join(corrected_text)
# Paraphrasing function using spaCy and NLTK
def paraphrase_with_spacy_nltk(text):
doc = nlp(text)
paraphrased_words = []
for token in doc:
# Map spaCy POS tags to WordNet POS tags
pos = None
if token.pos_ in {"NOUN"}:
pos = wordnet.NOUN
elif token.pos_ in {"VERB"}:
pos = wordnet.VERB
elif token.pos_ in {"ADJ"}:
pos = wordnet.ADJ
elif token.pos_ in {"ADV"}:
pos = wordnet.ADV
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
# Replace with a synonym only if it makes sense
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
paraphrased_words.append(synonyms[0])
else:
paraphrased_words.append(token.text)
# Join the words back into a sentence
paraphrased_sentence = ' '.join(paraphrased_words)
# Capitalize sentences and proper nouns
corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)
return corrected_text
# Function to correct grammar using Gramformer
def correct_grammar(text):
corrected_sentences = gf.correct(text)
return corrected_sentences[0] if corrected_sentences else text
# Combined function: Paraphrase -> Tense Check -> Capitalization -> Grammar Correction
def paraphrase_and_correct(text):
# Step 1: Paraphrase the text
paraphrased_text = paraphrase_with_spacy_nltk(text)
# Step 2: Check tense and verbs, and attempt correction
tense_checked_text = check_tense_and_correct(paraphrased_text)
# Step 3: Capitalize sentences and proper nouns
capitalized_text = capitalize_sentences_and_nouns(tense_checked_text)
# Step 4: Correct grammar using Gramformer
final_text = correct_grammar(capitalized_text)
return final_text
# Gradio interface definition
with gr.Blocks() as interface:
with gr.Row():
with gr.Column():
text_input = gr.Textbox(lines=5, label="Input Text")
detect_button = gr.Button("AI Detection")
paraphrase_button = gr.Button("Paraphrase & Correct")
with gr.Column():
output_text = gr.Textbox(label="Output")
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
# Launch the Gradio app
interface.launch(debug=False)