|
import types |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.corpus import wordnet |
|
nltk.download('wordnet') |
|
import pandas as pd |
|
import random |
|
|
|
|
|
class WordReplacer(object): |
|
|
|
def get_antonyms(self, word, pos=None): |
|
antonyms = set() |
|
for syn in wordnet.synsets(word, pos=pos): |
|
for lemma in syn.lemmas(): |
|
for antonym in lemma.antonyms(): |
|
antonyms.add(antonym.name()) |
|
if word in antonyms: |
|
antonyms.remove(word) |
|
return list(antonyms) |
|
|
|
def get_synonyms(self,word): |
|
""" |
|
Get synonyms of a word |
|
""" |
|
synonyms = set() |
|
|
|
for syn in wordnet.synsets(word): |
|
for l in syn.lemmas(): |
|
synonym = l.name().replace("_", " ").replace("-", " ").lower() |
|
synonym = "".join([char for char in synonym if char in ' qwertyuiopasdfghjklzxcvbnm']) |
|
synonyms.add(synonym) |
|
if word in synonyms: |
|
synonyms.remove(word) |
|
return list(synonyms) |
|
|
|
|
|
def sentence_replacement(self,words,n,types=""): |
|
words = words.split() |
|
types= types.lower() |
|
new_words= words.copy() |
|
random_word_list = list(set([word for word in words if word not in stopwords.words("english")])) |
|
random.shuffle(random_word_list) |
|
num_replaced = 0 |
|
if types == "antonyms": |
|
for random_word in random_word_list: |
|
antonyms = self.get_antonyms(random_word) |
|
|
|
if len(antonyms)>=1: |
|
antonyms = random.choice(list(antonyms)) |
|
new_words = [antonyms if word == random_word else word for word in new_words] |
|
num_replaced +=1 |
|
|
|
if num_replaced >=n: |
|
break |
|
|
|
if types=="synonyms": |
|
for random_word in random_word_list: |
|
synonyms = self.get_synonyms(random_word) |
|
|
|
if len(synonyms)>=1: |
|
synonyms = random.choice(list(synonyms)) |
|
new_words = [synonyms if word == random_word else word for word in new_words] |
|
num_replaced +=1 |
|
|
|
if num_replaced >=n: |
|
break |
|
sentence= " ".join(new_words) |
|
return sentence |
|
|
|
class WordSwapping(object): |
|
|
|
@staticmethod |
|
def swap_word(new_words): |
|
random_idx_1 = random.randint(0, len(new_words)-1) |
|
random_idx_2 = random_idx_1 |
|
counter = 0 |
|
while random_idx_2 == random_idx_1: |
|
random_idx_2 = random.randint(0, len(new_words)-1) |
|
counter += 1 |
|
|
|
if counter > 3: |
|
return new_words |
|
|
|
new_words[random_idx_1], new_words[random_idx_2] = new_words[random_idx_2], new_words[random_idx_1] |
|
return new_words |
|
|
|
@staticmethod |
|
def random_swap(words,n): |
|
words = words.split() |
|
new_words = words.copy() |
|
for _ in range(n): |
|
new_words = WordSwapping.swap_word(new_words) |
|
sentence = ' '.join(new_words) |
|
return sentence |
|
|
|
|
|
|
|
|
|
|
|
|
|
|