sashtech commited on
Commit
3c39506
·
verified ·
1 Parent(s): 17f790c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -29
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
3
  import torch
4
  import spacy
5
  import subprocess
@@ -25,18 +25,14 @@ word_vectors = api.load("glove-wiki-gigaword-50")
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
 
27
  # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
28
- tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
29
- model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
30
-
31
- # Load the grammar correction model
32
- tokenizer_gc = AutoTokenizer.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis")
33
- model_gc = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis").to(device)
34
 
35
  # AI detection function using DistilBERT
36
  def detect_ai_generated(text):
37
- inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
38
  with torch.no_grad():
39
- outputs = model_ai(**inputs)
40
  probabilities = torch.softmax(outputs.logits, dim=1)
41
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
42
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
@@ -49,7 +45,25 @@ def get_synonyms_nltk(word, pos):
49
  return [lemma.name() for lemma in lemmas]
50
  return []
51
 
52
- # Paraphrasing function using spaCy and NLTK (without grammar correction)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def paraphrase_with_spacy_nltk(text):
54
  doc = nlp(text)
55
  paraphrased_words = []
@@ -77,23 +91,8 @@ def paraphrase_with_spacy_nltk(text):
77
  # Join the words back into a sentence
78
  paraphrased_sentence = ' '.join(paraphrased_words)
79
 
80
- return paraphrased_sentence
81
-
82
- # Grammar correction function using the T5 model
83
- def correct_grammar(text):
84
- inputs = tokenizer_gc(text, return_tensors="pt", truncation=True, max_length=512).to(device)
85
- with torch.no_grad():
86
- outputs = model_gc.generate(inputs['input_ids'], max_length=512, num_beams=5, early_stopping=True)
87
- corrected_text = tokenizer_gc.decode(outputs[0], skip_special_tokens=True)
88
- return corrected_text
89
-
90
- # Combined function: Paraphrase -> Grammar Check
91
- def paraphrase_and_correct(text):
92
- # Step 1: Paraphrase the text
93
- paraphrased_text = paraphrase_with_spacy_nltk(text)
94
-
95
- # Step 2: Apply grammar correction
96
- corrected_text = correct_grammar(paraphrased_text)
97
 
98
  return corrected_text
99
 
@@ -103,12 +102,12 @@ with gr.Blocks() as interface:
103
  with gr.Column():
104
  text_input = gr.Textbox(lines=5, label="Input Text")
105
  detect_button = gr.Button("AI Detection")
106
- paraphrase_button = gr.Button("Paraphrase & Correct Grammar")
107
  with gr.Column():
108
  output_text = gr.Textbox(label="Output")
109
 
110
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
111
- paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
112
 
113
  # Launch the Gradio app
114
  interface.launch(debug=False)
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import spacy
5
  import subprocess
 
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
 
27
  # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
28
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
29
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
 
 
 
 
30
 
31
  # AI detection function using DistilBERT
32
  def detect_ai_generated(text):
33
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
34
  with torch.no_grad():
35
+ outputs = model(**inputs)
36
  probabilities = torch.softmax(outputs.logits, dim=1)
37
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
38
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
 
45
  return [lemma.name() for lemma in lemmas]
46
  return []
47
 
48
+ # Function to capitalize the first letter of sentences and proper nouns
49
+ def capitalize_sentences_and_nouns(text):
50
+ doc = nlp(text)
51
+ corrected_text = []
52
+
53
+ for sent in doc.sents:
54
+ sentence = []
55
+ for token in sent:
56
+ if token.i == sent.start: # First word of the sentence
57
+ sentence.append(token.text.capitalize())
58
+ elif token.pos_ == "PROPN": # Proper noun
59
+ sentence.append(token.text.capitalize())
60
+ else:
61
+ sentence.append(token.text)
62
+ corrected_text.append(' '.join(sentence))
63
+
64
+ return ' '.join(corrected_text)
65
+
66
+ # Paraphrasing function using spaCy and NLTK
67
  def paraphrase_with_spacy_nltk(text):
68
  doc = nlp(text)
69
  paraphrased_words = []
 
91
  # Join the words back into a sentence
92
  paraphrased_sentence = ' '.join(paraphrased_words)
93
 
94
+ # Capitalize sentences and proper nouns
95
+ corrected_text = capitalize_sentences_and_nouns(paraphrased_sentence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  return corrected_text
98
 
 
102
  with gr.Column():
103
  text_input = gr.Textbox(lines=5, label="Input Text")
104
  detect_button = gr.Button("AI Detection")
105
+ paraphrase_button = gr.Button("Paraphrase & Correct")
106
  with gr.Column():
107
  output_text = gr.Textbox(label="Output")
108
 
109
  detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
110
+ paraphrase_button.click(paraphrase_with_spacy_nltk, inputs=text_input, outputs=output_text)
111
 
112
  # Launch the Gradio app
113
  interface.launch(debug=False)