huamnifierWithSimpleGrammer

Running

App Files Files

huamnifierWithSimpleGrammer / app.py

sashtech

Update app.py

3e83484 verified 10 months ago

raw

history blame

4.03 kB

	import os
	import gradio as gr
	from transformers import pipeline
	import spacy
	import subprocess
	import nltk
	from nltk.corpus import wordnet

	# Initialize the English text classification pipeline for AI detection
	pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")

	# Function to predict the label and score for English text (AI Detection)
	def predict_en(text):
	res = pipeline_en(text)[0]
	return res['label'], res['score']

	# Ensure necessary NLTK data is downloaded for Humanifier
	nltk.download('wordnet')
	nltk.download('omw-1.4')

	# Ensure the SpaCy model is installed for Humanifier
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
	nlp = spacy.load("en_core_web_sm")

	# Function to get synonyms using NLTK WordNet and keep the same grammatical form
	def get_synonym(word, pos_tag):
	synsets = wordnet.synsets(word)
	if not synsets:
	return word

	for synset in synsets:
	if synset.pos() == pos_tag: # Match the part of speech
	synonym = synset.lemmas()[0].name() # Get the first lemma
	# Check if the original word and synonym are in the same form (singular/plural, tense, etc.)
	if word.islower():
	return synonym.lower()
	else:
	return synonym.capitalize()
	return word

	# Function to rephrase text and replace words with their synonyms while maintaining form
	def rephrase_with_synonyms(text):
	doc = nlp(text)
	rephrased_text = []

	for token in doc:
	# Get the correct POS tag for WordNet
	pos_tag = None
	if token.pos_ == "NOUN":
	pos_tag = wordnet.NOUN
	elif token.pos_ == "VERB":
	pos_tag = wordnet.VERB
	elif token.pos_ == "ADJ":
	pos_tag = wordnet.ADJ
	elif token.pos_ == "ADV":
	pos_tag = wordnet.ADV

	if pos_tag:
	synonym = get_synonym(token.text, pos_tag)
	# Ensure that the verb/noun/plural/singular is kept intact
	if token.pos_ == "VERB":
	synonym = token.lemma_ if token.morph.get("Tense") == "Past" else synonym
	elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
	synonym += 's' if not synonym.endswith('s') else ""
	rephrased_text.append(synonym)
	else:
	rephrased_text.append(token.text)

	return ' '.join(rephrased_text)

	# Function to paraphrase and correct grammar
	def paraphrase_and_correct(text):
	paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization

	# Apply grammatical corrections
	paraphrased_text = correct_article_errors(paraphrased_text)
	paraphrased_text = correct_singular_plural_errors(paraphrased_text)
	paraphrased_text = correct_tense_errors(paraphrased_text)

	# Rephrase with synonyms while maintaining grammatical forms
	paraphrased_text = rephrase_with_synonyms(paraphrased_text)

	return paraphrased_text

	# Gradio app setup with two tabs
	with gr.Blocks() as demo:
	with gr.Tab("AI Detection"):
	t1 = gr.Textbox(lines=5, label='Text')
	button1 = gr.Button("🤖 Predict!")
	label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
	score1 = gr.Textbox(lines=1, label='Prob')

	# Connect the prediction function to the button
	button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')

	with gr.Tab("Humanifier"):
	text_input = gr.Textbox(lines=5, label="Input Text")
	paraphrase_button = gr.Button("Paraphrase & Correct")
	output_text = gr.Textbox(label="Paraphrased Text")

	# Connect the paraphrasing function to the button
	paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)

	# Launch the app with the remaining functionalities
	demo.launch()