huamnifierWithSimpleGrammer

Running

App Files Files

huamnifierWithSimpleGrammer / app.py

sashtech

Update app.py

17f790c verified about 1 year ago

raw

history blame

4.36 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
	import torch
	import spacy
	import subprocess
	import nltk
	from nltk.corpus import wordnet
	from gensim import downloader as api

	# Ensure necessary NLTK data is downloaded
	nltk.download('wordnet')
	nltk.download('omw-1.4')

	# Ensure the spaCy model is installed
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	nlp = spacy.load("en_core_web_sm")

	# Load a smaller Word2Vec model from Gensim's pre-trained models
	word_vectors = api.load("glove-wiki-gigaword-50")

	# Check for GPU and set the device accordingly
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
	tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
	model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

	# Load the grammar correction model
	tokenizer_gc = AutoTokenizer.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis")
	model_gc = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis").to(device)

	# AI detection function using DistilBERT
	def detect_ai_generated(text):
	inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
	with torch.no_grad():
	outputs = model_ai(**inputs)
	probabilities = torch.softmax(outputs.logits, dim=1)
	ai_probability = probabilities[0][1].item() # Probability of being AI-generated
	return f"AI-Generated Content Probability: {ai_probability:.2f}%"

	# Function to get synonyms using NLTK WordNet
	def get_synonyms_nltk(word, pos):
	synsets = wordnet.synsets(word, pos=pos)
	if synsets:
	lemmas = synsets[0].lemmas()
	return [lemma.name() for lemma in lemmas]
	return []

	# Paraphrasing function using spaCy and NLTK (without grammar correction)
	def paraphrase_with_spacy_nltk(text):
	doc = nlp(text)
	paraphrased_words = []

	for token in doc:
	# Map spaCy POS tags to WordNet POS tags
	pos = None
	if token.pos_ in {"NOUN"}:
	pos = wordnet.NOUN
	elif token.pos_ in {"VERB"}:
	pos = wordnet.VERB
	elif token.pos_ in {"ADJ"}:
	pos = wordnet.ADJ
	elif token.pos_ in {"ADV"}:
	pos = wordnet.ADV

	synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []

	# Replace with a synonym only if it makes sense
	if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
	paraphrased_words.append(synonyms[0])
	else:
	paraphrased_words.append(token.text)

	# Join the words back into a sentence
	paraphrased_sentence = ' '.join(paraphrased_words)

	return paraphrased_sentence

	# Grammar correction function using the T5 model
	def correct_grammar(text):
	inputs = tokenizer_gc(text, return_tensors="pt", truncation=True, max_length=512).to(device)
	with torch.no_grad():
	outputs = model_gc.generate(inputs['input_ids'], max_length=512, num_beams=5, early_stopping=True)
	corrected_text = tokenizer_gc.decode(outputs[0], skip_special_tokens=True)
	return corrected_text

	# Combined function: Paraphrase -> Grammar Check
	def paraphrase_and_correct(text):
	# Step 1: Paraphrase the text
	paraphrased_text = paraphrase_with_spacy_nltk(text)

	# Step 2: Apply grammar correction
	corrected_text = correct_grammar(paraphrased_text)

	return corrected_text

	# Gradio interface definition
	with gr.Blocks() as interface:
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(lines=5, label="Input Text")
	detect_button = gr.Button("AI Detection")
	paraphrase_button = gr.Button("Paraphrase & Correct Grammar")
	with gr.Column():
	output_text = gr.Textbox(label="Output")

	detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
	paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)

	# Launch the Gradio app
	interface.launch(debug=False)