sashtech commited on
Commit
0b6e157
·
verified ·
1 Parent(s): 01902a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +229 -70
app.py CHANGED
@@ -6,13 +6,6 @@ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
9
- from fastapi import FastAPI, HTTPException
10
- from pydantic import BaseModel
11
- import uvicorn
12
- import uuid # To generate unique link IDs
13
-
14
- # Initialize FastAPI app
15
- api_app = FastAPI()
16
 
17
  # Initialize the English text classification pipeline for AI detection
18
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -31,73 +24,239 @@ except OSError:
31
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
32
  nlp = spacy.load("en_core_web_sm")
33
 
34
- # Generate temporary link storage (could be database or in-memory store)
35
- temporary_links = {}
36
-
37
- # Define request models for FastAPI
38
- class TextRequest(BaseModel):
39
- text: str
40
-
41
  # Function to predict the label and score for English text (AI Detection)
42
  def predict_en(text):
43
  res = pipeline_en(text)[0]
44
  return res['label'], res['score']
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Function to paraphrase and correct grammar with enhanced accuracy
47
  def paraphrase_and_correct(text):
48
- # Here should go all the paraphrasing and grammar correction logic.
49
- return text # For now just return the input
50
-
51
- # API Endpoint to create a new temporary link for Gradio interface
52
- @api_app.post("/generate-link/")
53
- async def generate_temporary_link(task: str):
54
- # Check if the task is either 'ai-detection' or 'paraphrase'
55
- if task not in ["ai-detection", "paraphrase"]:
56
- raise HTTPException(status_code=400, detail="Invalid task type.")
57
-
58
- # Create a unique link using UUID
59
- link_id = str(uuid.uuid4())
60
-
61
- # Set up Gradio interface based on task
62
- if task == "ai-detection":
63
- with gr.Blocks() as demo:
64
- t1 = gr.Textbox(lines=5, label='Text')
65
- button1 = gr.Button("🤖 Predict!")
66
- label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
67
- score1 = gr.Textbox(lines=1, label='Prob')
68
-
69
- # Connect the prediction function to the button
70
- button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
71
-
72
- elif task == "paraphrase":
73
- with gr.Blocks() as demo:
74
- t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
75
- button2 = gr.Button("🔄 Paraphrase and Correct")
76
- result2 = gr.Textbox(lines=10, label='Corrected Text', placeholder="The corrected text will appear here...")
77
-
78
- # Connect the paraphrasing and correction function to the button
79
- button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
80
-
81
- # Launch Gradio and get the link
82
- demo_url = demo.launch(share=True, prevent_thread_lock=True)
83
-
84
- # Save the generated link in memory (temporary)
85
- temporary_links[link_id] = {"task": task, "url": demo_url}
86
-
87
- # Return the link to the user
88
- return {"link_id": link_id, "url": demo_url}
89
-
90
- # API Endpoint to get the status or result via the generated link
91
- @api_app.get("/get-link/{link_id}")
92
- async def get_temporary_link(link_id: str):
93
- # Check if the link exists
94
- if link_id not in temporary_links:
95
- raise HTTPException(status_code=404, detail="Link not found.")
96
-
97
- # Retrieve the link details
98
- link_details = temporary_links[link_id]
99
- return {"link": link_details["url"]}
100
-
101
- # Run the FastAPI app
102
- if __name__ == "__main__":
103
- uvicorn.run(api_app, host="0.0.0.0", port=8000)
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
  from spellchecker import SpellChecker
 
 
 
 
 
 
 
9
 
10
  # Initialize the English text classification pipeline for AI detection
11
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
24
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
25
  nlp = spacy.load("en_core_web_sm")
26
 
 
 
 
 
 
 
 
27
  # Function to predict the label and score for English text (AI Detection)
28
  def predict_en(text):
29
  res = pipeline_en(text)[0]
30
  return res['label'], res['score']
31
 
32
+ # Function to get synonyms using NLTK WordNet
33
+ def get_synonyms_nltk(word, pos):
34
+ synsets = wordnet.synsets(word, pos=pos)
35
+ if synsets:
36
+ lemmas = synsets[0].lemmas()
37
+ return [lemma.name() for lemma in lemmas]
38
+ return []
39
+
40
+ # Function to remove redundant and meaningless words
41
+ def remove_redundant_words(text):
42
+ doc = nlp(text)
43
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
44
+ filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
45
+ return ' '.join(filtered_text)
46
+
47
+ # Function to capitalize the first letter of sentences and proper nouns
48
+ def capitalize_sentences_and_nouns(text):
49
+ doc = nlp(text)
50
+ corrected_text = []
51
+
52
+ for sent in doc.sents:
53
+ sentence = []
54
+ for token in sent:
55
+ if token.i == sent.start: # First word of the sentence
56
+ sentence.append(token.text.capitalize())
57
+ elif token.pos_ == "PROPN": # Proper noun
58
+ sentence.append(token.text.capitalize())
59
+ else:
60
+ sentence.append(token.text)
61
+ corrected_text.append(' '.join(sentence))
62
+
63
+ return '\n'.join(corrected_text) # Preserve paragraphs by joining sentences with newline
64
+
65
+ # Function to force capitalization of the first letter of every sentence
66
+ def force_first_letter_capital(text):
67
+ sentences = text.split(". ") # Split by period to get each sentence
68
+ capitalized_sentences = [sentence[0].capitalize() + sentence[1:] if sentence else "" for sentence in sentences]
69
+ return ". ".join(capitalized_sentences)
70
+
71
+ # Function to correct tense errors in a sentence
72
+ def correct_tense_errors(text):
73
+ doc = nlp(text)
74
+ corrected_text = []
75
+ for token in doc:
76
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
77
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
78
+ corrected_text.append(lemma)
79
+ else:
80
+ corrected_text.append(token.text)
81
+ return ' '.join(corrected_text)
82
+
83
+ # Function to correct singular/plural errors
84
+ def correct_singular_plural_errors(text):
85
+ doc = nlp(text)
86
+ corrected_text = []
87
+
88
+ for token in doc:
89
+ if token.pos_ == "NOUN":
90
+ if token.tag_ == "NN": # Singular noun
91
+ if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
92
+ corrected_text.append(token.lemma_ + 's')
93
+ else:
94
+ corrected_text.append(token.text)
95
+ elif token.tag_ == "NNS": # Plural noun
96
+ if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
97
+ corrected_text.append(token.lemma_)
98
+ else:
99
+ corrected_text.append(token.text)
100
+ else:
101
+ corrected_text.append(token.text)
102
+
103
+ return ' '.join(corrected_text)
104
+
105
+ # Function to check and correct article errors
106
+ def correct_article_errors(text):
107
+ doc = nlp(text)
108
+ corrected_text = []
109
+ for token in doc:
110
+ if token.text in ['a', 'an']:
111
+ next_token = token.nbor(1)
112
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
113
+ corrected_text.append("an")
114
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
115
+ corrected_text.append("a")
116
+ else:
117
+ corrected_text.append(token.text)
118
+ else:
119
+ corrected_text.append(token.text)
120
+ return ' '.join(corrected_text)
121
+
122
+ # Function to get the correct synonym while maintaining verb form
123
+ def replace_with_synonym(token):
124
+ pos = None
125
+ if token.pos_ == "VERB":
126
+ pos = wordnet.VERB
127
+ elif token.pos_ == "NOUN":
128
+ pos = wordnet.NOUN
129
+ elif token.pos_ == "ADJ":
130
+ pos = wordnet.ADJ
131
+ elif token.pos_ == "ADV":
132
+ pos = wordnet.ADV
133
+
134
+ synonyms = get_synonyms_nltk(token.lemma_, pos)
135
+
136
+ if synonyms:
137
+ synonym = synonyms[0]
138
+ if token.tag_ == "VBG": # Present participle (e.g., running)
139
+ synonym = synonym + 'ing'
140
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
141
+ synonym = synonym + 'ed'
142
+ elif token.tag_ == "VBZ": # Third-person singular present
143
+ synonym = synonym + 's'
144
+ return synonym
145
+ return token.text
146
+
147
+ # Function to check for and avoid double negatives
148
+ def correct_double_negatives(text):
149
+ doc = nlp(text)
150
+ corrected_text = []
151
+ for token in doc:
152
+ if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
153
+ corrected_text.append("always")
154
+ else:
155
+ corrected_text.append(token.text)
156
+ return ' '.join(corrected_text)
157
+
158
+ # Function to ensure subject-verb agreement
159
+ def ensure_subject_verb_agreement(text):
160
+ doc = nlp(text)
161
+ corrected_text = []
162
+ for token in doc:
163
+ if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
164
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
165
+ corrected_text.append(token.head.lemma_ + "s")
166
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
167
+ corrected_text.append(token.head.lemma_)
168
+ corrected_text.append(token.text)
169
+ return ' '.join(corrected_text)
170
+
171
+ # Function to correct spelling errors
172
+ def correct_spelling(text):
173
+ words = text.split()
174
+ corrected_words = []
175
+ for word in words:
176
+ corrected_word = spell.correction(word)
177
+ corrected_words.append(corrected_word)
178
+ return ' '.join(corrected_words)
179
+
180
+ # Function to rephrase text and replace words with their synonyms while maintaining form
181
+ def rephrase_with_synonyms(text):
182
+ doc = nlp(text)
183
+ rephrased_text = []
184
+
185
+ for token in doc:
186
+ pos_tag = None
187
+ if token.pos_ == "NOUN":
188
+ pos_tag = wordnet.NOUN
189
+ elif token.pos_ == "VERB":
190
+ pos_tag = wordnet.VERB
191
+ elif token.pos_ == "ADJ":
192
+ pos_tag = wordnet.ADJ
193
+ elif token.pos_ == "ADV":
194
+ pos_tag = wordnet.ADV
195
+
196
+ if pos_tag:
197
+ synonyms = get_synonyms_nltk(token.text, pos_tag)
198
+ if synonyms:
199
+ synonym = synonyms[0] # Just using the first synonym for simplicity
200
+ if token.pos_ == "VERB":
201
+ if token.tag_ == "VBG": # Present participle (e.g., running)
202
+ synonym = synonym + 'ing'
203
+ elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
204
+ synonym = synonym + 'ed'
205
+ elif token.tag_ == "VBZ": # Third-person singular present
206
+ synonym = synonym + 's'
207
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
208
+ synonym += 's' if not synonym.endswith('s') else ""
209
+ rephrased_text.append(synonym)
210
+ else:
211
+ rephrased_text.append(token.text)
212
+ else:
213
+ rephrased_text.append(token.text)
214
+
215
+ return ' '.join(rephrased_text)
216
+
217
  # Function to paraphrase and correct grammar with enhanced accuracy
218
  def paraphrase_and_correct(text):
219
+ # Remove meaningless or redundant words first
220
+ cleaned_text = remove_redundant_words(text)
221
+
222
+ # Capitalize sentences and nouns
223
+ paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
224
+
225
+ # Ensure first letter of each sentence is capitalized
226
+ paraphrased_text = force_first_letter_capital(paraphrased_text)
227
+
228
+ # Apply grammatical corrections
229
+ paraphrased_text = correct_article_errors(paraphrased_text)
230
+ paraphrased_text = correct_singular_plural_errors(paraphrased_text)
231
+ paraphrased_text = correct_tense_errors(paraphrased_text)
232
+ paraphrased_text = correct_double_negatives(paraphrased_text)
233
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
234
+
235
+ # Rephrase with synonyms while maintaining grammatical forms
236
+ paraphrased_text = rephrase_with_synonyms(paraphrased_text)
237
+
238
+ # Correct spelling errors
239
+ paraphrased_text = correct_spelling(paraphrased_text)
240
+
241
+ return paraphrased_text
242
+
243
+ # Gradio app setup with two tabs
244
+ with gr.Blocks() as demo:
245
+ with gr.Tab("AI Detection"):
246
+ t1 = gr.Textbox(lines=5, label='Text')
247
+ button1 = gr.Button("🤖 Predict!")
248
+ label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
249
+ score1 = gr.Textbox(lines=1, label='Prob')
250
+
251
+ # Connect the prediction function to the button
252
+ button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
253
+
254
+ with gr.Tab("Paraphrasing & Grammar Correction"):
255
+ t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
256
+ button2 = gr.Button("🔄 Paraphrase and Correct")
257
+ result2 = gr.Textbox(lines=10, label='Corrected Text', placeholder="The corrected text will appear here...")
258
+
259
+ # Connect the paraphrasing and correction function to the button
260
+ button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
261
+
262
+ demo.launch(share=True) # Share=True to create a public link