sashtech commited on
Commit
cdd9b28
·
verified ·
1 Parent(s): e9b5c2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -14
app.py CHANGED
@@ -5,8 +5,7 @@ import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
- from gingerit.gingerit import GingerIt
9
-
10
  from gensim import downloader as api
11
 
12
  # Ensure necessary NLTK data is downloaded
@@ -30,15 +29,12 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
31
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
32
 
33
- # Initialize Gingerit for grammar correction
34
- def correct_grammar_with_gingerit(text):
35
- try:
36
- parser = GingerIt()
37
- result = parser.parse(text)
38
- return result['result']
39
- except Exception as e:
40
- print(f"Error in Gingerit grammar correction: {e}")
41
- return text # Return the original text if Gingerit fails
42
 
43
  # AI detection function using DistilBERT
44
  def detect_ai_generated(text):
@@ -57,7 +53,7 @@ def get_synonyms_nltk(word, pos):
57
  return [lemma.name() for lemma in lemmas]
58
  return []
59
 
60
- # Paraphrasing function using spaCy and NLTK with Gingerit grammar correction
61
  def paraphrase_with_spacy_nltk(text):
62
  doc = nlp(text)
63
  paraphrased_words = []
@@ -85,8 +81,8 @@ def paraphrase_with_spacy_nltk(text):
85
  # Join the words back into a sentence
86
  paraphrased_sentence = ' '.join(paraphrased_words)
87
 
88
- # Correct the grammar of the paraphrased sentence using Gingerit
89
- corrected_sentence = correct_grammar_with_gingerit(paraphrased_sentence)
90
 
91
  return corrected_sentence
92
 
 
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
+ import language_tool_python # Import language-tool-python for grammar correction
 
9
  from gensim import downloader as api
10
 
11
  # Ensure necessary NLTK data is downloaded
 
29
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
30
  model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
31
 
32
+ # Function to correct grammar using language-tool-python
33
+ def correct_grammar_with_language_tool(text):
34
+ tool = language_tool_python.LanguageTool('en-US')
35
+ matches = tool.check(text)
36
+ corrected_text = language_tool_python.utils.correct(text, matches)
37
+ return corrected_text
 
 
 
38
 
39
  # AI detection function using DistilBERT
40
  def detect_ai_generated(text):
 
53
  return [lemma.name() for lemma in lemmas]
54
  return []
55
 
56
+ # Paraphrasing function using spaCy and NLTK with grammar correction
57
  def paraphrase_with_spacy_nltk(text):
58
  doc = nlp(text)
59
  paraphrased_words = []
 
81
  # Join the words back into a sentence
82
  paraphrased_sentence = ' '.join(paraphrased_words)
83
 
84
+ # Correct the grammar of the paraphrased sentence using language-tool-python
85
+ corrected_sentence = correct_grammar_with_language_tool(paraphrased_sentence)
86
 
87
  return corrected_sentence
88