Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
import torch
|
4 |
import spacy
|
5 |
import subprocess
|
@@ -25,18 +25,14 @@ word_vectors = api.load("glove-wiki-gigaword-50")
|
|
25 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
26 |
|
27 |
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
# Load the grammar correction model
|
32 |
-
tokenizer_gc = AutoTokenizer.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis")
|
33 |
-
model_gc = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis").to(device)
|
34 |
|
35 |
# AI detection function using DistilBERT
|
36 |
def detect_ai_generated(text):
|
37 |
-
inputs =
|
38 |
with torch.no_grad():
|
39 |
-
outputs =
|
40 |
probabilities = torch.softmax(outputs.logits, dim=1)
|
41 |
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
|
42 |
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
|
@@ -49,7 +45,25 @@ def get_synonyms_nltk(word, pos):
|
|
49 |
return [lemma.name() for lemma in lemmas]
|
50 |
return []
|
51 |
|
52 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def paraphrase_with_spacy_nltk(text):
|
54 |
doc = nlp(text)
|
55 |
paraphrased_words = []
|
@@ -77,23 +91,8 @@ def paraphrase_with_spacy_nltk(text):
|
|
77 |
# Join the words back into a sentence
|
78 |
paraphrased_sentence = ' '.join(paraphrased_words)
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
# Grammar correction function using the T5 model
|
83 |
-
def correct_grammar(text):
|
84 |
-
inputs = tokenizer_gc(text, return_tensors="pt", truncation=True, max_length=512).to(device)
|
85 |
-
with torch.no_grad():
|
86 |
-
outputs = model_gc.generate(inputs['input_ids'], max_length=512, num_beams=5, early_stopping=True)
|
87 |
-
corrected_text = tokenizer_gc.decode(outputs[0], skip_special_tokens=True)
|
88 |
-
return corrected_text
|
89 |
-
|
90 |
-
# Combined function: Paraphrase -> Grammar Check
|
91 |
-
def paraphrase_and_correct(text):
|
92 |
-
# Step 1: Paraphrase the text
|
93 |
-
paraphrased_text = paraphrase_with_spacy_nltk(text)
|
94 |
-
|
95 |
-
# Step 2: Apply grammar correction
|
96 |
-
corrected_text = correct_grammar(paraphrased_text)
|
97 |
|
98 |
return corrected_text
|
99 |
|
@@ -103,12 +102,12 @@ with gr.Blocks() as interface:
|
|
103 |
with gr.Column():
|
104 |
text_input = gr.Textbox(lines=5, label="Input Text")
|
105 |
detect_button = gr.Button("AI Detection")
|
106 |
-
paraphrase_button = gr.Button("Paraphrase & Correct
|
107 |
with gr.Column():
|
108 |
output_text = gr.Textbox(label="Output")
|
109 |
|
110 |
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
|
111 |
-
paraphrase_button.click(
|
112 |
|
113 |
# Launch the Gradio app
|
114 |
interface.launch(debug=False)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
3 |
import torch
|
4 |
import spacy
|
5 |
import subprocess
|
|
|
25 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
26 |
|
27 |
# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
|
29 |
+
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# AI detection function using DistilBERT
|
32 |
def detect_ai_generated(text):
|
33 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
|
34 |
with torch.no_grad():
|
35 |
+
outputs = model(**inputs)
|
36 |
probabilities = torch.softmax(outputs.logits, dim=1)
|
37 |
ai_probability = probabilities[0][1].item() # Probability of being AI-generated
|
38 |
return f"AI-Generated Content Probability: {ai_probability:.2f}%"
|
|
|
45 |
return [lemma.name() for lemma in lemmas]
|
46 |
return []
|
47 |
|
48 |
+
# Function to capitalize the first letter of sentences and proper nouns
|
49 |
+
def capitalize_sentences_and_nouns(text):
|
50 |
+
doc = nlp(text)
|
51 |
+
corrected_text = []
|
52 |
+
|
53 |
+
for sent in doc.sents:
|
54 |
+
sentence = []
|
55 |
+
for token in sent:
|
56 |
+
if token.i == sent.start: # First word of the sentence
|
57 |
+
sentence.append(token.text.capitalize())
|
58 |
+
elif token.pos_ == "PROPN": # Proper noun
|
59 |
+
sentence.append(token.text.capitalize())
|
60 |
+
else:
|
61 |
+
sentence.append(token.text)
|
62 |
+
corrected_text.append(' '.join(sentence))
|
63 |
+
|
64 |
+
return ' '.join(corrected_text)
|
65 |
+
|
66 |
+
# Paraphrasing function using spaCy and NLTK
|
67 |
def paraphrase_with_spacy_nltk(text):
|
68 |
doc = nlp(text)
|
69 |
paraphrased_words = []
|
|
|
91 |
# Join the words back into a sentence
|
92 |
paraphrased_sentence = ' '.join(paraphrased_words)
|
93 |
|
94 |
+
# Capitalize sentences and proper nouns
|
95 |
+
corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
return corrected_text
|
98 |
|
|
|
102 |
with gr.Column():
|
103 |
text_input = gr.Textbox(lines=5, label="Input Text")
|
104 |
detect_button = gr.Button("AI Detection")
|
105 |
+
paraphrase_button = gr.Button("Paraphrase & Correct")
|
106 |
with gr.Column():
|
107 |
output_text = gr.Textbox(label="Output")
|
108 |
|
109 |
detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
|
110 |
+
paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
|
111 |
|
112 |
# Launch the Gradio app
|
113 |
interface.launch(debug=False)
|