sashtech commited on
Commit
0b9eac5
·
verified ·
1 Parent(s): a1de069

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -5,7 +5,7 @@ import spacy
5
  import subprocess
6
  import json
7
  import nltk
8
- from nltk.corpus import wordnet, stopwords # Import stopwords here
9
  from spellchecker import SpellChecker
10
  import re
11
  import random
@@ -13,24 +13,22 @@ import string
13
 
14
  # Ensure necessary NLTK data is downloaded
15
  def download_nltk_resources():
16
- try:nltk.download('punkt')
 
17
  nltk.download('stopwords')
18
  nltk.download('averaged_perceptron_tagger')
19
- nltk.download('averaged_perceptron_tagger_eng')
20
  nltk.download('wordnet')
21
  nltk.download('omw-1.4')
22
- nltk.download('punkt_tab')
23
-
24
  except Exception as e:
25
  print(f"Error downloading NLTK resources: {e}")
26
 
27
  # Call the download function
28
  download_nltk_resources()
29
 
30
- top_words = set(stopwords.words("english")) # More efficient as a set
31
 
32
  # Path to the thesaurus file
33
- thesaurus_file_path = 'en_thesaurus.jsonl (1).old' # Ensure the file path is correct
34
 
35
  # Function to load the thesaurus into a dictionary
36
  def load_thesaurus(file_path):
@@ -38,7 +36,6 @@ def load_thesaurus(file_path):
38
  try:
39
  with open(file_path, 'r', encoding='utf-8') as file:
40
  for line in file:
41
- # Parse each line as a JSON object
42
  entry = json.loads(line.strip())
43
  word = entry.get("word")
44
  synonyms = entry.get("synonyms", [])
@@ -77,7 +74,7 @@ def predict_en(text):
77
  except Exception as e:
78
  return f"Error during AI detection: {e}"
79
 
80
- # Modified plagiarism_remover function to use the loaded thesaurus
81
  def plagiarism_remover(word):
82
  if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
83
  return word
@@ -234,6 +231,7 @@ def paraphrase_and_correct(text):
234
  # Create the Gradio interface
235
  with gr.Blocks() as demo:
236
  gr.Markdown("# AI Text Processor")
 
237
  with gr.Tab("AI Detection"):
238
  t1 = gr.Textbox(lines=5, label='Input Text')
239
  btn1 = gr.Button("Detect AI")
 
5
  import subprocess
6
  import json
7
  import nltk
8
+ from nltk.corpus import wordnet, stopwords
9
  from spellchecker import SpellChecker
10
  import re
11
  import random
 
13
 
14
  # Ensure necessary NLTK data is downloaded
15
  def download_nltk_resources():
16
+ try:
17
+ nltk.download('punkt')
18
  nltk.download('stopwords')
19
  nltk.download('averaged_perceptron_tagger')
 
20
  nltk.download('wordnet')
21
  nltk.download('omw-1.4')
 
 
22
  except Exception as e:
23
  print(f"Error downloading NLTK resources: {e}")
24
 
25
  # Call the download function
26
  download_nltk_resources()
27
 
28
+ top_words = set(stopwords.words("english"))
29
 
30
  # Path to the thesaurus file
31
+ thesaurus_file_path = 'en_thesaurus.jsonl' # Ensure the file path is correct
32
 
33
  # Function to load the thesaurus into a dictionary
34
  def load_thesaurus(file_path):
 
36
  try:
37
  with open(file_path, 'r', encoding='utf-8') as file:
38
  for line in file:
 
39
  entry = json.loads(line.strip())
40
  word = entry.get("word")
41
  synonyms = entry.get("synonyms", [])
 
74
  except Exception as e:
75
  return f"Error during AI detection: {e}"
76
 
77
+ # Function to remove plagiarism
78
  def plagiarism_remover(word):
79
  if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
80
  return word
 
231
  # Create the Gradio interface
232
  with gr.Blocks() as demo:
233
  gr.Markdown("# AI Text Processor")
234
+
235
  with gr.Tab("AI Detection"):
236
  t1 = gr.Textbox(lines=5, label='Input Text')
237
  btn1 = gr.Button("Detect AI")