|
""" fontTools.misc.classifyTools.py -- tools for classifying things. |
|
""" |
|
|
|
|
|
class Classifier(object): |
|
""" |
|
Main Classifier object, used to classify things into similar sets. |
|
""" |
|
|
|
def __init__(self, sort=True): |
|
self._things = set() |
|
self._sets = [] |
|
self._mapping = {} |
|
self._dirty = False |
|
self._sort = sort |
|
|
|
def add(self, set_of_things): |
|
""" |
|
Add a set to the classifier. Any iterable is accepted. |
|
""" |
|
if not set_of_things: |
|
return |
|
|
|
self._dirty = True |
|
|
|
things, sets, mapping = self._things, self._sets, self._mapping |
|
|
|
s = set(set_of_things) |
|
intersection = s.intersection(things) |
|
s.difference_update(intersection) |
|
difference = s |
|
del s |
|
|
|
|
|
if difference: |
|
things.update(difference) |
|
sets.append(difference) |
|
for thing in difference: |
|
mapping[thing] = difference |
|
del difference |
|
|
|
while intersection: |
|
|
|
old_class = mapping[next(iter(intersection))] |
|
old_class_intersection = old_class.intersection(intersection) |
|
|
|
|
|
old_class.difference_update(old_class_intersection) |
|
|
|
|
|
intersection.difference_update(old_class_intersection) |
|
|
|
|
|
sets.append(old_class_intersection) |
|
for thing in old_class_intersection: |
|
mapping[thing] = old_class_intersection |
|
del old_class_intersection |
|
|
|
def update(self, list_of_sets): |
|
""" |
|
Add a a list of sets to the classifier. Any iterable of iterables is accepted. |
|
""" |
|
for s in list_of_sets: |
|
self.add(s) |
|
|
|
def _process(self): |
|
if not self._dirty: |
|
return |
|
|
|
|
|
sets = self._sets |
|
self._sets = [s for s in sets if s] |
|
|
|
if self._sort: |
|
self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s))) |
|
|
|
self._dirty = False |
|
|
|
|
|
|
|
def getThings(self): |
|
"""Returns the set of all things known so far. |
|
|
|
The return value belongs to the Classifier object and should NOT |
|
be modified while the classifier is still in use. |
|
""" |
|
self._process() |
|
return self._things |
|
|
|
def getMapping(self): |
|
"""Returns the mapping from things to their class set. |
|
|
|
The return value belongs to the Classifier object and should NOT |
|
be modified while the classifier is still in use. |
|
""" |
|
self._process() |
|
return self._mapping |
|
|
|
def getClasses(self): |
|
"""Returns the list of class sets. |
|
|
|
The return value belongs to the Classifier object and should NOT |
|
be modified while the classifier is still in use. |
|
""" |
|
self._process() |
|
return self._sets |
|
|
|
|
|
def classify(list_of_sets, sort=True): |
|
""" |
|
Takes a iterable of iterables (list of sets from here on; but any |
|
iterable works.), and returns the smallest list of sets such that |
|
each set, is either a subset, or is disjoint from, each of the input |
|
sets. |
|
|
|
In other words, this function classifies all the things present in |
|
any of the input sets, into similar classes, based on which sets |
|
things are a member of. |
|
|
|
If sort=True, return class sets are sorted by decreasing size and |
|
their natural sort order within each class size. Otherwise, class |
|
sets are returned in the order that they were identified, which is |
|
generally not significant. |
|
|
|
>>> classify([]) == ([], {}) |
|
True |
|
>>> classify([[]]) == ([], {}) |
|
True |
|
>>> classify([[], []]) == ([], {}) |
|
True |
|
>>> classify([[1]]) == ([{1}], {1: {1}}) |
|
True |
|
>>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}}) |
|
True |
|
>>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) |
|
True |
|
>>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) |
|
True |
|
>>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}}) |
|
True |
|
>>> classify([[1,2],[2,4,5]]) == ( |
|
... [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) |
|
True |
|
>>> classify([[1,2],[2,4,5]], sort=False) == ( |
|
... [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) |
|
True |
|
>>> classify([[1,2,9],[2,4,5]], sort=False) == ( |
|
... [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5}, |
|
... 9: {1, 9}}) |
|
True |
|
>>> classify([[1,2,9,15],[2,4,5]], sort=False) == ( |
|
... [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5}, |
|
... 5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}}) |
|
True |
|
>>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False) |
|
>>> set([frozenset(c) for c in classes]) == set( |
|
... [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})]) |
|
True |
|
>>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}} |
|
True |
|
""" |
|
classifier = Classifier(sort=sort) |
|
classifier.update(list_of_sets) |
|
return classifier.getClasses(), classifier.getMapping() |
|
|
|
|
|
if __name__ == "__main__": |
|
import sys, doctest |
|
|
|
sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed) |
|
|