huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 2, 2024

Commit

5065a5b

verified ·

1 Parent(s): e675ad6

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -1

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import spacy
 import subprocess
 import nltk
 from nltk.corpus import wordnet
-import language_check  # Use language-check instead of language-tool-python
 from gensim import downloader as api
 # Ensure necessary NLTK data is downloaded
@@ -28,3 +27,66 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)

 import subprocess
 import nltk
 from nltk.corpus import wordnet
 from gensim import downloader as api
 # Ensure necessary NLTK data is downloaded
 # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
+# AI detection function using DistilBERT
+def detect_ai_generated(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probabilities = torch.softmax(outputs.logits, dim=1)
+    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
+    return f"AI-Generated Content Probability: {ai_probability:.2f}%"
+# Function to get synonyms using NLTK WordNet
+def get_synonyms_nltk(word, pos):
+    synsets = wordnet.synsets(word, pos=pos)
+    if synsets:
+        lemmas = synsets[0].lemmas()
+        return [lemma.name() for lemma in lemmas]
+    return []
+# Paraphrasing function using spaCy and NLTK (without grammar correction)
+def paraphrase_with_spacy_nltk(text):
+    doc = nlp(text)
+    paraphrased_words = []
+    for token in doc:
+        # Map spaCy POS tags to WordNet POS tags
+        pos = None
+        if token.pos_ in {"NOUN"}:
+            pos = wordnet.NOUN
+        elif token.pos_ in {"VERB"}:
+            pos = wordnet.VERB
+        elif token.pos_ in {"ADJ"}:
+            pos = wordnet.ADJ
+        elif token.pos_ in {"ADV"}:
+            pos = wordnet.ADV
+        synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
+        # Replace with a synonym only if it makes sense
+        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
+            paraphrased_words.append(synonyms[0])
+        else:
+            paraphrased_words.append(token.text)
+    # Join the words back into a sentence
+    paraphrased_sentence = ' '.join(paraphrased_words)
+    return paraphrased_sentence
+# Gradio interface definition
+with gr.Blocks() as interface:
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(lines=5, label="Input Text")
+            detect_button = gr.Button("AI Detection")
+            paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK")
+        with gr.Column():
+            output_text = gr.Textbox(label="Output")
+    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
+    paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
+# Launch the Gradio app
+interface.launch(debug=False)