sashtech commited on
Commit
19c632c
·
verified ·
1 Parent(s): 486bbd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -275
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import gradio as grimport os
3
  import gradio as gr
4
  from transformers import pipeline
5
  import spacy
@@ -247,280 +246,7 @@ def paraphrase_and_correct(text):
247
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
248
 
249
  # Force capitalization of the first letter of each sentence
250
- final_text = force_first_letter_capital(paraphrased_text)
251
-
252
- return final_text
253
-
254
- # Gradio Interface
255
- def process_text(input_text):
256
- ai_label, ai_score = predict_en(input_text)
257
- corrected_text = paraphrase_and_correct(input_text)
258
- return ai_label, ai_score, corrected_text
259
-
260
- # Create Gradio interface
261
- iface = gr.Interface(
262
- fn=process_text,
263
- inputs="text",
264
- outputs=["text", "number", "text"],
265
- title="AI Content Detection and Grammar Correction",
266
- description="Enter text to detect AI-generated content and correct grammar."
267
- )
268
-
269
- # Launch the Gradio app
270
- if __name__ == "__main__":
271
- iface.launch()
272
-
273
- from transformers import pipeline
274
- import spacy
275
- import subprocess
276
- import nltk
277
- from nltk.corpus import wordnet
278
- from spellchecker import SpellChecker
279
- import re
280
-
281
- # Initialize the English text classification pipeline for AI detection
282
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
283
-
284
- # Initialize the spell checker
285
- spell = SpellChecker()
286
-
287
- # Ensure necessary NLTK data is downloaded
288
- nltk.download('wordnet')
289
- nltk.download('omw-1.4')
290
-
291
- # Ensure the SpaCy model is installed
292
- try:
293
- nlp = spacy.load("en_core_web_sm")
294
- except OSError:
295
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
296
- nlp = spacy.load("en_core_web_sm")
297
-
298
- # Function to predict the label and score for English text (AI Detection)
299
- def predict_en(text):
300
- res = pipeline_en(text)[0]
301
- return res['label'], res['score']
302
-
303
- # Function to get synonyms using NLTK WordNet
304
- def get_synonyms_nltk(word, pos):
305
- synsets = wordnet.synsets(word, pos=pos)
306
- if synsets:
307
- lemmas = synsets[0].lemmas()
308
- return [lemma.name() for lemma in lemmas]
309
- return []
310
-
311
- # Function to remove redundant and meaningless words
312
- def remove_redundant_words(text):
313
- doc = nlp(text)
314
- meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
315
- filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
316
- return ' '.join(filtered_text)
317
-
318
- # Function to capitalize the first letter of sentences and proper nouns
319
- def capitalize_sentences_and_nouns(text):
320
- doc = nlp(text)
321
- corrected_text = []
322
-
323
- for sent in doc.sents:
324
- sentence = []
325
- for token in sent:
326
- if token.i == sent.start: # First word of the sentence
327
- sentence.append(token.text.capitalize())
328
- elif token.pos_ == "PROPN": # Proper noun
329
- sentence.append(token.text.capitalize())
330
- else:
331
- sentence.append(token.text)
332
- corrected_text.append(' '.join(sentence))
333
-
334
- return ' '.join(corrected_text)
335
-
336
- # Function to force capitalization of the first letter of every sentence
337
- def force_first_letter_capital(text):
338
- sentences = text.split(". ") # Split by period to get each sentence
339
- capitalized_sentences = [sentence[0].capitalize() + sentence[1:] if sentence else "" for sentence in sentences]
340
- return ". ".join(capitalized_sentences)
341
-
342
- # Function to correct tense errors in a sentence
343
- def correct_tense_errors(text):
344
- doc = nlp(text)
345
- corrected_text = []
346
- for token in doc:
347
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
348
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
349
- corrected_text.append(lemma)
350
- else:
351
- corrected_text.append(token.text)
352
- return ' '.join(corrected_text)
353
-
354
- # Function to correct singular/plural errors
355
- def correct_singular_plural_errors(text):
356
- doc = nlp(text)
357
- corrected_text = []
358
-
359
- for token in doc:
360
- if token.pos_ == "NOUN":
361
- if token.tag_ == "NN": # Singular noun
362
- if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
363
- corrected_text.append(token.lemma_ + 's')
364
- else:
365
- corrected_text.append(token.text)
366
- elif token.tag_ == "NNS": # Plural noun
367
- if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
368
- corrected_text.append(token.lemma_)
369
- else:
370
- corrected_text.append(token.text)
371
- else:
372
- corrected_text.append(token.text)
373
-
374
- return ' '.join(corrected_text)
375
-
376
- # Function to check and correct article errors
377
- def correct_article_errors(text):
378
- doc = nlp(text)
379
- corrected_text = []
380
- for token in doc:
381
- if token.text in ['a', 'an']:
382
- next_token = token.nbor(1)
383
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
384
- corrected_text.append("an")
385
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
386
- corrected_text.append("a")
387
- else:
388
- corrected_text.append(token.text)
389
- else:
390
- corrected_text.append(token.text)
391
- return ' '.join(corrected_text)
392
-
393
- # Function to get the correct synonym while maintaining verb form
394
- def replace_with_synonym(token):
395
- pos = None
396
- if token.pos_ == "VERB":
397
- pos = wordnet.VERB
398
- elif token.pos_ == "NOUN":
399
- pos = wordnet.NOUN
400
- elif token.pos_ == "ADJ":
401
- pos = wordnet.ADJ
402
- elif token.pos_ == "ADV":
403
- pos = wordnet.ADV
404
-
405
- synonyms = get_synonyms_nltk(token.lemma_, pos)
406
-
407
- if synonyms:
408
- synonym = synonyms[0]
409
- if token.tag_ == "VBG": # Present participle (e.g., running)
410
- synonym = synonym + 'ing'
411
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
412
- synonym = synonym + 'ed'
413
- elif token.tag_ == "VBZ": # Third-person singular present
414
- synonym = synonym + 's'
415
- return synonym
416
- return token.text
417
-
418
- # Function to check for and avoid double negatives
419
- def correct_double_negatives(text):
420
- doc = nlp(text)
421
- corrected_text = []
422
- for token in doc:
423
- if token.text.lower() == "not" and any(child.text.lower() == "never" for child in token.head.children):
424
- corrected_text.append("always")
425
- else:
426
- corrected_text.append(token.text)
427
- return ' '.join(corrected_text)
428
-
429
- # Function to ensure subject-verb agreement
430
- def ensure_subject_verb_agreement(text):
431
- doc = nlp(text)
432
- corrected_text = []
433
- for token in doc:
434
- if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
435
- if token.tag_ == "NN" and token.head.tag_ != "VBZ": # Singular noun, should use singular verb
436
- corrected_text.append(token.head.lemma_ + "s")
437
- elif token.tag_ == "NNS" and token.head.tag_ == "VBZ": # Plural noun, should not use singular verb
438
- corrected_text.append(token.head.lemma_)
439
- corrected_text.append(token.text)
440
- return ' '.join(corrected_text)
441
-
442
- # Function to correct spelling errors
443
- def correct_spelling(text):
444
- words = text.split()
445
- corrected_words = []
446
- for word in words:
447
- corrected_word = spell.correction(word)
448
- corrected_words.append(corrected_word if corrected_word else word) # Keep original if correction is None
449
- return ' '.join(corrected_words)
450
-
451
- # Function to correct punctuation issues
452
- def correct_punctuation(text):
453
- text = re.sub(r'\s+([?.!,";:])', r'\1', text) # Remove space before punctuation
454
- text = re.sub(r'([?.!,";:])\s+', r'\1 ', text) # Ensure a single space after punctuation
455
- return text
456
-
457
- # Function to ensure correct handling of possessive forms
458
- def handle_possessives(text):
459
- text = re.sub(r"\b(\w+)'s\b", r"\1's", text) # Preserve possessive forms
460
- return text
461
-
462
- # Function to rephrase text and replace words with their synonyms while maintaining form
463
- def rephrase_with_synonyms(text):
464
- doc = nlp(text)
465
- rephrased_text = []
466
-
467
- for token in doc:
468
- pos_tag = None
469
- if token.pos_ == "NOUN":
470
- pos_tag = wordnet.NOUN
471
- elif token.pos_ == "VERB":
472
- pos_tag = wordnet.VERB
473
- elif token.pos_ == "ADJ":
474
- pos_tag = wordnet.ADJ
475
- elif token.pos_ == "ADV":
476
- pos_tag = wordnet.ADV
477
-
478
- if pos_tag:
479
- synonyms = get_synonyms_nltk(token.text, pos_tag)
480
- if synonyms:
481
- synonym = synonyms[0] # Just using the first synonym for simplicity
482
- if token.pos_ == "VERB":
483
- if token.tag_ == "VBG": # Present participle (e.g., running)
484
- synonym = synonym + 'ing'
485
- elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle
486
- synonym = synonym + 'ed'
487
- elif token.tag_ == "VBZ": # Third-person singular present
488
- synonym = synonym + 's'
489
- elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
490
- synonym += 's' if not synonym.endswith('s') else ""
491
- rephrased_text.append(synonym)
492
- else:
493
- rephrased_text.append(token.text)
494
- else:
495
- rephrased_text.append(token.text)
496
-
497
- return ' '.join(rephrased_text)
498
-
499
- # Function to paraphrase and correct grammar with enhanced accuracy
500
- def paraphrase_and_correct(text):
501
- # Remove meaningless or redundant words first
502
- cleaned_text = remove_redundant_words(text)
503
-
504
- # Capitalize sentences and nouns
505
- paraphrased_text = capitalize_sentences_and_nouns(cleaned_text)
506
-
507
- # Correct tense and singular/plural errors
508
- paraphrased_text = correct_tense_errors(paraphrased_text)
509
- paraphrased_text = correct_singular_plural_errors(paraphrased_text)
510
- paraphrased_text = correct_article_errors(paraphrased_text)
511
- paraphrased_text = correct_double_negatives(paraphrased_text)
512
- paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
513
-
514
- # Correct spelling and punctuation
515
- paraphrased_text = correct_spelling(paraphrased_text)
516
- paraphrased_text = correct_punctuation(paraphrased_text)
517
- paraphrased_text = handle_possessives(paraphrased_text) # Handle possessives
518
-
519
- # Rephrase with synonyms
520
- paraphrased_text = rephrase_with_synonyms(paraphrased_text)
521
-
522
- # Force capitalization of the first letter of each sentence
523
- final_text = force_first_letter_capital(paraphrased_text)
524
 
525
  return final_text
526
 
 
1
  import os
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
 
246
  paraphrased_text = rephrase_with_synonyms(paraphrased_text)
247
 
248
  # Force capitalization of the first letter of each sentence
249
+ final_text = capitalize_sentences_and_nouns(paraphrased_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
  return final_text
252