sashtech commited on
Commit
29e4a04
·
verified ·
1 Parent(s): 8ef58cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -84
app.py CHANGED
@@ -6,6 +6,12 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
 
 
 
 
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -24,19 +30,15 @@ except OSError:
24
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
25
  nlp = spacy.load("en_core_web_sm")
26
 
 
 
 
 
27
  # Function to predict the label and score for English text (AI Detection)
28
  def predict_en(text):
29
  res = pipeline_en(text)[0]
30
  return res['label'], res['score']
31
 
32
- # Function to get synonyms using NLTK WordNet
33
- def get_synonyms_nltk(word, pos):
34
- synsets = wordnet.synsets(word, pos=pos)
35
- if synsets:
36
- lemmas = synsets[0].lemmas()
37
- return [lemma.name() for lemma in lemmas]
38
- return []
39
-
40
  # Function to remove redundant and meaningless words
41
  def remove_redundant_words(text):
42
  doc = nlp(text)
@@ -102,72 +104,6 @@ def correct_singular_plural_errors(text):
102
 
103
  return ' '.join(corrected_text)
104
 
105
- # Function to check and correct article errors
106
- def correct_article_errors(text):
107
- doc = nlp(text)
108
- corrected_text = []
109
- for token in doc:
110
- if token.text in ['a', 'an']:
111
- next_token = token.nbor(1)
112
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
113
- corrected_text.append("an")
114
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
115
- corrected_text.append("a")
116
- else:
117
- corrected_text.append(token.text)
118
- else:
119
- corrected_text.append(token.text)
120
- return ' '.join(corrected_text)
121
-
122
- # Function to get the correct synonym while maintaining verb form
123
- def replace_with_synonym(token):
124
- pos = None
125
- if token.pos_ == "VERB":
126
- pos = wordnet.VERB
127
- elif token.pos_ == "NOUN":
128
- pos = wordnet.NOUN
129
- elif token.pos_ == "ADJ":
130
- pos = wordnet.ADJ
131
- elif token.pos_ == "ADV":
132
- pos = wordnet.ADV
133
-
134
- synonyms = get_synonyms_nltk(token.lemma_, pos)
135
-
136
- if synonyms:
137
- synonym = synonyms[0]
138
- if token.tag_ == "VBG": # Present participle (e.g., running)
139
- synonym = synonym + 'ing'
140
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
141
- synonym = synonym + 'ed'
142
- elif token.tag_ == "VBZ": # Third-person singular present
143
- synonym = synonym + 's'
144
- return synonym
145
- return token.text
146
-
147
- # Function to check for and avoid double negatives
148
- def correct_double_negatives(text):
149
- doc = nlp(text)
150
- corrected_text = []
151
- for token in doc:
152
- if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
153
- corrected_text.append("always")
154
- else:
155
- corrected_text.append(token.text)
156
- return ' '.join(corrected_text)
157
-
158
- # Function to ensure subject-verb agreement
159
- def ensure_subject_verb_agreement(text):
160
- doc = nlp(text)
161
- corrected_text = []
162
- for token in doc:
163
- if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
164
- if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
165
- corrected_text.append(token.head.lemma_ + "s")
166
- elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
167
- corrected_text.append(token.head.lemma_)
168
- corrected_text.append(token.text)
169
- return ' '.join(corrected_text)
170
-
171
  # Function to correct spelling errors
172
  def correct_spelling(text):
173
  words = text.split()
@@ -194,18 +130,16 @@ def rephrase_with_synonyms(text):
194
  pos_tag = wordnet.ADV
195
 
196
  if pos_tag:
197
- synonyms = get_synonyms_nltk(token.text, pos_tag)
198
  if synonyms:
199
- synonym = synonyms[0] # Just using the first synonym for simplicity
200
  if token.pos_ == "VERB":
201
- if token.tag_ == "VBG": # Present participle (e.g., running)
202
  synonym = synonym + 'ing'
203
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
204
  synonym = synonym + 'ed'
205
  elif token.tag_ == "VBZ": # Third-person singular present
206
  synonym = synonym + 's'
207
- elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
208
- synonym += 's' if not synonym.endswith('s') else ""
209
  rephrased_text.append(synonym)
210
  else:
211
  rephrased_text.append(token.text)
@@ -226,11 +160,8 @@ def paraphrase_and_correct(text):
226
  paraphrased_text = force_first_letter_capital(paraphrased_text)
227
 
228
  # Apply grammatical corrections
229
- paraphrased_text = correct_article_errors(paraphrased_text)
230
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
231
  paraphrased_text = correct_tense_errors(paraphrased_text)
232
- paraphrased_text = correct_double_negatives(paraphrased_text)
233
- paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
234
 
235
  # Rephrase with synonyms while maintaining grammatical forms
236
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
@@ -240,6 +171,18 @@ def paraphrase_and_correct(text):
240
 
241
  return paraphrased_text
242
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  # Gradio app setup with two tabs
244
  with gr.Blocks() as demo:
245
  with gr.Tab("AI Detection"):
@@ -259,4 +202,9 @@ with gr.Blocks() as demo:
259
  # Connect the paraphrasing and correction function to the button
260
  button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
261
 
262
- demo.launch(share=True) # Share=True to create a public link
 
 
 
 
 
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
+ from fastapi import FastAPI
10
+ from pydantic import BaseModel
11
+ import uvicorn
12
+
13
+ # Initialize FastAPI app
14
+ api_app = FastAPI()
15
 
16
  # Initialize the English text classification pipeline for AI detection
17
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
30
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
31
  nlp = spacy.load("en_core_web_sm")
32
 
33
+ # Define request models for FastAPI
34
+ class TextRequest(BaseModel):
35
+ text: str
36
+
37
  # Function to predict the label and score for English text (AI Detection)
38
  def predict_en(text):
39
  res = pipeline_en(text)[0]
40
  return res['label'], res['score']
41
 
 
 
 
 
 
 
 
 
42
  # Function to remove redundant and meaningless words
43
  def remove_redundant_words(text):
44
  doc = nlp(text)
 
104
 
105
  return ' '.join(corrected_text)
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # Function to correct spelling errors
108
  def correct_spelling(text):
109
  words = text.split()
 
130
  pos_tag = wordnet.ADV
131
 
132
  if pos_tag:
133
+ synonyms = wordnet.synsets(token.lemma_, pos=pos_tag)
134
  if synonyms:
135
+ synonym = synonyms[0].lemmas()[0].name()
136
  if token.pos_ == "VERB":
137
+ if token.tag_ == "VBG": # Present participle
138
  synonym = synonym + 'ing'
139
+ elif token.tag_ in {"VBD", "VBN"}: # Past tense or past participle
140
  synonym = synonym + 'ed'
141
  elif token.tag_ == "VBZ": # Third-person singular present
142
  synonym = synonym + 's'
 
 
143
  rephrased_text.append(synonym)
144
  else:
145
  rephrased_text.append(token.text)
 
160
  paraphrased_text = force_first_letter_capital(paraphrased_text)
161
 
162
  # Apply grammatical corrections
 
163
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
164
  paraphrased_text = correct_tense_errors(paraphrased_text)
 
 
165
 
166
  # Rephrase with synonyms while maintaining grammatical forms
167
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
 
171
 
172
  return paraphrased_text
173
 
174
+ # FastAPI Endpoint for AI detection
175
+ @api_app.post("/ai-detection")
176
+ async def ai_detection(request: TextRequest):
177
+ label, score = predict_en(request.text)
178
+ return {"label": label, "score": score}
179
+
180
+ # FastAPI Endpoint for paraphrasing and grammar correction
181
+ @api_app.post("/paraphrase")
182
+ async def paraphrase(request: TextRequest):
183
+ corrected_text = paraphrase_and_correct(request.text)
184
+ return {"corrected_text": corrected_text}
185
+
186
  # Gradio app setup with two tabs
187
  with gr.Blocks() as demo:
188
  with gr.Tab("AI Detection"):
 
202
  # Connect the paraphrasing and correction function to the button
203
  button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
204
 
205
+ # Launch the Gradio app
206
+ demo.launch(share=True)
207
+
208
+ # Run the FastAPI app in a separate thread if needed
209
+ if __name__ == "__main__":
210
+ uvicorn.run(api_app, host="0.0.0.0", port=8000)