Spaces:
Sleeping
Sleeping
""" | |
Word Merge | |
------------------------------------------------ | |
Word Merge transformations act by taking two adjacent words, and "merges" them into one word by deleting one word and replacing another. | |
For example, if we can merge the words "the" and "movie" in the text "I like the movie" and get following text: "I like film". | |
When we choose to "merge" word at index ``i``, we merge it with the next word at ``i+1``. | |
""" | |
from textattack.transformations import Transformation | |
class WordMerge(Transformation): | |
"""An abstract class for word merges.""" | |
def __call__( | |
self, | |
current_text, | |
pre_transformation_constraints=[], | |
indices_to_modify=None, | |
shifted_idxs=True, | |
): | |
"""Returns a list of all possible transformations for ``current_text``. | |
Applies the ``pre_transformation_constraints`` then calls | |
``_get_transformations``. | |
Args: | |
current_text: The ``AttackedText`` to transform. | |
pre_transformation_constraints: The ``PreTransformationConstraint`` to apply before | |
beginning the transformation. | |
indices_to_modify: Which word indices should be modified as dictated by the | |
``SearchMethod``. | |
shifted_idxs (bool): Whether indices have been shifted from | |
their original position in the text. | |
""" | |
if indices_to_modify is None: | |
indices_to_modify = set(range(len(current_text.words) - 1)) | |
else: | |
indices_to_modify = set(indices_to_modify) | |
if shifted_idxs: | |
indices_to_modify = set( | |
current_text.convert_from_original_idxs(indices_to_modify) | |
) | |
for constraint in pre_transformation_constraints: | |
allowed_indices = constraint(current_text, self) | |
for i in indices_to_modify: | |
if i not in allowed_indices and i + 1 not in allowed_indices: | |
indices_to_modify.remove(i) | |
transformed_texts = self._get_transformations(current_text, indices_to_modify) | |
for text in transformed_texts: | |
text.attack_attrs["last_transformation"] = self | |
if len(text.attack_attrs["newly_modified_indices"]) == 0: | |
print("xcv", text, len(text.attack_attrs["newly_modified_indices"])) | |
return transformed_texts | |
def _get_new_words(self, current_text, index): | |
"""Returns a set of new words we can insert at position `index` of `current_text` | |
Args: | |
current_text (AttackedText): Current text to modify. | |
index (int): Position in which to insert a new word | |
Returns: | |
list[str]: List of new words to insert. | |
""" | |
raise NotImplementedError() | |
def _get_transformations(self, current_text, indices_to_modify): | |
""" | |
Return a set of transformed texts obtained by insertion a new word in `indices_to_modify` | |
Args: | |
current_text (AttackedText): Current text to modify. | |
indices_to_modify (list[int]): List of positions in which to insert a new word. | |
Returns: | |
list[AttackedText]: List of transformed texts | |
""" | |
transformed_texts = [] | |
for i in indices_to_modify: | |
new_words = self._get_new_words(current_text, i) | |
for w in new_words: | |
temp_text = current_text.replace_word_at_index(i, w) | |
transformed_texts.append(temp_text.delete_word_at_index(i + 1)) | |
return transformed_texts | |