huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 2, 2024

Commit

6cae122

verified ·

1 Parent(s): cdd9b28

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -74

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import spacy
 import subprocess
 import nltk
 from nltk.corpus import wordnet
-import language_tool_python  # Import language-tool-python for grammar correction
 from gensim import downloader as api
 # Ensure necessary NLTK data is downloaded
@@ -28,76 +28,3 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
-# Function to correct grammar using language-tool-python
-def correct_grammar_with_language_tool(text):
-    tool = language_tool_python.LanguageTool('en-US')
-    matches = tool.check(text)
-    corrected_text = language_tool_python.utils.correct(text, matches)
-    return corrected_text
-# AI detection function using DistilBERT
-def detect_ai_generated(text):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        probabilities = torch.softmax(outputs.logits, dim=1)
-    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
-    return f"AI-Generated Content Probability: {ai_probability:.2f}%"
-# Function to get synonyms using NLTK WordNet
-def get_synonyms_nltk(word, pos):
-    synsets = wordnet.synsets(word, pos=pos)
-    if synsets:
-        lemmas = synsets[0].lemmas()
-        return [lemma.name() for lemma in lemmas]
-    return []
-# Paraphrasing function using spaCy and NLTK with grammar correction
-def paraphrase_with_spacy_nltk(text):
-    doc = nlp(text)
-    paraphrased_words = []
-    for token in doc:
-        # Map spaCy POS tags to WordNet POS tags
-        pos = None
-        if token.pos_ in {"NOUN"}:
-            pos = wordnet.NOUN
-        elif token.pos_ in {"VERB"}:
-            pos = wordnet.VERB
-        elif token.pos_ in {"ADJ"}:
-            pos = wordnet.ADJ
-        elif token.pos_ in {"ADV"}:
-            pos = wordnet.ADV
-        synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
-        # Replace with a synonym only if it makes sense
-        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
-            paraphrased_words.append(synonyms[0])
-        else:
-            paraphrased_words.append(token.text)
-    # Join the words back into a sentence
-    paraphrased_sentence = ' '.join(paraphrased_words)
-    # Correct the grammar of the paraphrased sentence using language-tool-python
-    corrected_sentence = correct_grammar_with_language_tool(paraphrased_sentence)
-    return corrected_sentence
-# Gradio interface definition
-with gr.Blocks() as interface:
-    with gr.Row():
-        with gr.Column():
-            text_input = gr.Textbox(lines=5, label="Input Text")
-            detect_button = gr.Button("AI Detection")
-            paraphrase_button = gr.Button("Paraphrase with spaCy & NLTK (Grammar Corrected)")
-        with gr.Column():
-            output_text = gr.Textbox(label="Output")
-    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
-    paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
-# Launch the Gradio app
-interface.launch(debug=False)

 import subprocess
 import nltk
 from nltk.corpus import wordnet
+import language_check  # Use language-check instead of language-tool-python
 from gensim import downloader as api
 # Ensure necessary NLTK data is downloaded
 # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)