Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification,
|
3 |
import torch
|
4 |
import spacy
|
5 |
import subprocess
|
6 |
import nltk
|
7 |
from nltk.corpus import wordnet
|
8 |
from gensim import downloader as api
|
9 |
-
from gingerit.gingerit import GingerIt # Import GingerIt for grammar correction
|
10 |
|
11 |
# Ensure necessary NLTK data is downloaded
|
12 |
nltk.download('wordnet')
|
@@ -26,14 +25,18 @@ word_vectors = api.load("glove-wiki-gigaword-50")
|
|
26 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
27 |
|
28 |
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
|
32 |
# AI detection function using DistilBERT
|
33 |
def detect_ai_generated(text):
|
34 |
-
inputs =
|
35 |
with torch.no_grad():
|
36 |
-
outputs =
|
37 |
probabilities = torch.softmax(outputs.logits, dim=1)
|
38 |
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
|
39 |
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
|
@@ -76,11 +79,13 @@ def paraphrase_with_spacy_nltk(text):
|
|
76 |
|
77 |
return paraphrased_sentence
|
78 |
|
79 |
-
# Grammar correction function using
|
80 |
def correct_grammar(text):
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
84 |
|
85 |
# Combined function: Paraphrase -> Grammar Check
|
86 |
def paraphrase_and_correct(text):
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
|
3 |
import torch
|
4 |
import spacy
|
5 |
import subprocess
|
6 |
import nltk
|
7 |
from nltk.corpus import wordnet
|
8 |
from gensim import downloader as api
|
|
|
9 |
|
10 |
# Ensure necessary NLTK data is downloaded
|
11 |
nltk.download('wordnet')
|
|
|
25 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
26 |
|
27 |
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
|
28 |
+
tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
29 |
+
model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
|
30 |
+
|
31 |
+
# Load the grammar correction model
|
32 |
+
tokenizer_gc = AutoTokenizer.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis")
|
33 |
+
model_gc = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis").to(device)
|
34 |
|
35 |
# AI detection function using DistilBERT
|
36 |
def detect_ai_generated(text):
|
37 |
+
inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
|
38 |
with torch.no_grad():
|
39 |
+
outputs = model_ai(**inputs)
|
40 |
probabilities = torch.softmax(outputs.logits, dim=1)
|
41 |
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
|
42 |
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
|
|
|
79 |
|
80 |
return paraphrased_sentence
|
81 |
|
82 |
+
# Grammar correction function using the T5 model
|
83 |
def correct_grammar(text):
|
84 |
+
inputs = tokenizer_gc(text, return_tensors="pt", truncation=True, max_length=512).to(device)
|
85 |
+
with torch.no_grad():
|
86 |
+
outputs = model_gc.generate(inputs['input_ids'], max_length=512, num_beams=5, early_stopping=True)
|
87 |
+
corrected_text = tokenizer_gc.decode(outputs[0], skip_special_tokens=True)
|
88 |
+
return corrected_text
|
89 |
|
90 |
# Combined function: Paraphrase -> Grammar Check
|
91 |
def paraphrase_and_correct(text):
|