sashtech commited on
Commit
17f790c
·
verified ·
1 Parent(s): 7e4465c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -10
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
  import torch
4
  import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from gensim import downloader as api
9
- from gingerit.gingerit import GingerIt # Import GingerIt for grammar correction
10
 
11
  # Ensure necessary NLTK data is downloaded
12
  nltk.download('wordnet')
@@ -26,14 +25,18 @@ word_vectors = api.load("glove-wiki-gigaword-50")
26
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
 
28
  # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
29
- tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
30
- model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
 
 
 
 
31
 
32
  # AI detection function using DistilBERT
33
  def detect_ai_generated(text):
34
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
35
  with torch.no_grad():
36
- outputs = model(**inputs)
37
  probabilities = torch.softmax(outputs.logits, dim=1)
38
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
39
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
@@ -76,11 +79,13 @@ def paraphrase_with_spacy_nltk(text):
76
 
77
  return paraphrased_sentence
78
 
79
- # Grammar correction function using GingerIt
80
  def correct_grammar(text):
81
- parser = GingerIt()
82
- result = parser.parse(text)
83
- return result['result'] # Return the corrected text
 
 
84
 
85
  # Combined function: Paraphrase -> Grammar Check
86
  def paraphrase_and_correct(text):
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
3
  import torch
4
  import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from gensim import downloader as api
 
9
 
10
  # Ensure necessary NLTK data is downloaded
11
  nltk.download('wordnet')
 
25
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
 
27
  # Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
28
+ tokenizer_ai = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
29
+ model_ai = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
30
+
31
+ # Load the grammar correction model
32
+ tokenizer_gc = AutoTokenizer.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis")
33
+ model_gc = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/flan-t5-large-grammar-synthesis").to(device)
34
 
35
  # AI detection function using DistilBERT
36
  def detect_ai_generated(text):
37
+ inputs = tokenizer_ai(text, return_tensors="pt", truncation=True, max_length=512).to(device)
38
  with torch.no_grad():
39
+ outputs = model_ai(**inputs)
40
  probabilities = torch.softmax(outputs.logits, dim=1)
41
  ai_probability = probabilities[0][1].item() # Probability of being AI-generated
42
  return f"AI-Generated Content Probability: {ai_probability:.2f}%"
 
79
 
80
  return paraphrased_sentence
81
 
82
+ # Grammar correction function using the T5 model
83
  def correct_grammar(text):
84
+ inputs = tokenizer_gc(text, return_tensors="pt", truncation=True, max_length=512).to(device)
85
+ with torch.no_grad():
86
+ outputs = model_gc.generate(inputs['input_ids'], max_length=512, num_beams=5, early_stopping=True)
87
+ corrected_text = tokenizer_gc.decode(outputs[0], skip_special_tokens=True)
88
+ return corrected_text
89
 
90
  # Combined function: Paraphrase -> Grammar Check
91
  def paraphrase_and_correct(text):