sashtech commited on
Commit
3e83484
·
verified ·
1 Parent(s): 90fff6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -118
app.py CHANGED
@@ -25,139 +25,65 @@ except OSError:
25
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
26
  nlp = spacy.load("en_core_web_sm")
27
 
28
- # Function to get synonyms using NLTK WordNet (Humanifier)
29
- def get_synonyms_nltk(word, pos):
30
- synsets = wordnet.synsets(word, pos=pos)
31
- if synsets:
32
- lemmas = synsets[0].lemmas()
33
- return [lemma.name() for lemma in lemmas]
34
- return []
 
 
 
 
 
 
 
 
35
 
36
- # Updated function to replace words with synonyms while preserving verb forms and pluralization
37
- def replace_with_synonyms(text):
38
  doc = nlp(text)
39
- replaced_words = {}
40
- corrected_text = []
41
 
42
  for token in doc:
43
- word = token.text
44
- pos = token.pos_
45
-
46
- # Get the WordNet POS tag format
47
- if pos == "VERB":
48
- wordnet_pos = wordnet.VERB
49
- elif pos == "NOUN":
50
- wordnet_pos = wordnet.NOUN
51
- elif pos == "ADJ":
52
- wordnet_pos = wordnet.ADJ
53
- elif pos == "ADV":
54
- wordnet_pos = wordnet.ADV
55
- else:
56
- corrected_text.append(word) # No change for other POS
57
- continue
58
-
59
- # Get synonyms for the word based on POS
60
- if word in replaced_words:
61
- synonym = replaced_words[word]
62
  else:
63
- synonyms = get_synonyms_nltk(word, wordnet_pos)
64
- if synonyms:
65
- synonym = synonyms[0] # Use the first synonym
66
- # Ensure the synonym retains the same form (e.g., plural, verb form)
67
- if pos == "VERB":
68
- synonym = token.lemma_ if synonym == token.lemma_ else token._.inflect(token.tag_)
69
- if pos == "NOUN" and token.tag_ == "NNS": # If plural noun, make sure synonym is plural
70
- synonym += 's'
71
- replaced_words[word] = synonym
72
- else:
73
- synonym = word # No synonym found, keep the word as is
74
-
75
- corrected_text.append(synonym)
76
 
77
- return ' '.join(corrected_text)
78
 
79
- # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
80
- def capitalize_sentences_and_nouns(text):
81
- doc = nlp(text)
82
- corrected_text = []
83
-
84
- for sent in doc.sents:
85
- sentence = []
86
- for token in sent:
87
- if token.i == sent.start: # First word of the sentence
88
- sentence.append(token.text.capitalize())
89
- elif token.pos_ == "PROPN": # Proper noun
90
- sentence.append(token.text.capitalize())
91
- else:
92
- sentence.append(token.text)
93
- corrected_text.append(' '.join(sentence))
94
-
95
- return ' '.join(corrected_text)
96
-
97
- # Function to paraphrase and correct grammar with stronger synonym usage
98
  def paraphrase_and_correct(text):
99
  paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization
100
 
101
- # Replace words with their synonyms
102
- paraphrased_text = replace_with_synonyms(paraphrased_text)
103
-
104
- # Apply grammatical corrections (can include other corrections from the original functions)
105
  paraphrased_text = correct_article_errors(paraphrased_text)
106
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
107
  paraphrased_text = correct_tense_errors(paraphrased_text)
108
-
109
- return paraphrased_text
110
-
111
- # Correct article errors
112
- def correct_article_errors(text):
113
- doc = nlp(text)
114
- corrected_text = []
115
- for token in doc:
116
- if token.text in ['a', 'an']:
117
- next_token = token.nbor(1)
118
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
119
- corrected_text.append("an")
120
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
121
- corrected_text.append("a")
122
- else:
123
- corrected_text.append(token.text)
124
- else:
125
- corrected_text.append(token.text)
126
- return ' '.join(corrected_text)
127
-
128
- # Correct singular/plural errors
129
- def correct_singular_plural_errors(text):
130
- doc = nlp(text)
131
- corrected_text = []
132
 
133
- for token in doc:
134
- if token.pos_ == "NOUN":
135
- if token.tag_ == "NN": # Singular noun
136
- if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
137
- corrected_text.append(token.lemma_ + 's')
138
- else:
139
- corrected_text.append(token.text)
140
- elif token.tag_ == "NNS": # Plural noun
141
- if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
142
- corrected_text.append(token.lemma_)
143
- else:
144
- corrected_text.append(token.text)
145
- else:
146
- corrected_text.append(token.text)
147
 
148
- return ' '.join(corrected_text)
149
-
150
- # Correct tense errors in verbs
151
- def correct_tense_errors(text):
152
- doc = nlp(text)
153
- corrected_text = []
154
- for token in doc:
155
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
156
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
157
- corrected_text.append(lemma)
158
- else:
159
- corrected_text.append(token.text)
160
- return ' '.join(corrected_text)
161
 
162
  # Gradio app setup with two tabs
163
  with gr.Blocks() as demo:
 
25
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
26
  nlp = spacy.load("en_core_web_sm")
27
 
28
+ # Function to get synonyms using NLTK WordNet and keep the same grammatical form
29
+ def get_synonym(word, pos_tag):
30
+ synsets = wordnet.synsets(word)
31
+ if not synsets:
32
+ return word
33
+
34
+ for synset in synsets:
35
+ if synset.pos() == pos_tag: # Match the part of speech
36
+ synonym = synset.lemmas()[0].name() # Get the first lemma
37
+ # Check if the original word and synonym are in the same form (singular/plural, tense, etc.)
38
+ if word.islower():
39
+ return synonym.lower()
40
+ else:
41
+ return synonym.capitalize()
42
+ return word
43
 
44
+ # Function to rephrase text and replace words with their synonyms while maintaining form
45
+ def rephrase_with_synonyms(text):
46
  doc = nlp(text)
47
+ rephrased_text = []
 
48
 
49
  for token in doc:
50
+ # Get the correct POS tag for WordNet
51
+ pos_tag = None
52
+ if token.pos_ == "NOUN":
53
+ pos_tag = wordnet.NOUN
54
+ elif token.pos_ == "VERB":
55
+ pos_tag = wordnet.VERB
56
+ elif token.pos_ == "ADJ":
57
+ pos_tag = wordnet.ADJ
58
+ elif token.pos_ == "ADV":
59
+ pos_tag = wordnet.ADV
60
+
61
+ if pos_tag:
62
+ synonym = get_synonym(token.text, pos_tag)
63
+ # Ensure that the verb/noun/plural/singular is kept intact
64
+ if token.pos_ == "VERB":
65
+ synonym = token.lemma_ if token.morph.get("Tense") == "Past" else synonym
66
+ elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns
67
+ synonym += 's' if not synonym.endswith('s') else ""
68
+ rephrased_text.append(synonym)
69
  else:
70
+ rephrased_text.append(token.text)
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ return ' '.join(rephrased_text)
73
 
74
+ # Function to paraphrase and correct grammar
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def paraphrase_and_correct(text):
76
  paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization
77
 
78
+ # Apply grammatical corrections
 
 
 
79
  paraphrased_text = correct_article_errors(paraphrased_text)
80
  paraphrased_text = correct_singular_plural_errors(paraphrased_text)
81
  paraphrased_text = correct_tense_errors(paraphrased_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Rephrase with synonyms while maintaining grammatical forms
84
+ paraphrased_text = rephrase_with_synonyms(paraphrased_text)
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ return paraphrased_text
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  # Gradio app setup with two tabs
89
  with gr.Blocks() as demo: