Spaces:
Sleeping
Sleeping
File size: 6,089 Bytes
6a86ad5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
from itertools import zip_longest
from sympy.utilities.enumerative import (
list_visitor,
MultisetPartitionTraverser,
multiset_partitions_taocp
)
from sympy.utilities.iterables import _set_partitions
# first some functions only useful as test scaffolding - these provide
# straightforward, but slow reference implementations against which to
# compare the real versions, and also a comparison to verify that
# different versions are giving identical results.
def part_range_filter(partition_iterator, lb, ub):
"""
Filters (on the number of parts) a multiset partition enumeration
Arguments
=========
lb, and ub are a range (in the Python slice sense) on the lpart
variable returned from a multiset partition enumeration. Recall
that lpart is 0-based (it points to the topmost part on the part
stack), so if you want to return parts of sizes 2,3,4,5 you would
use lb=1 and ub=5.
"""
for state in partition_iterator:
f, lpart, pstack = state
if lpart >= lb and lpart < ub:
yield state
def multiset_partitions_baseline(multiplicities, components):
"""Enumerates partitions of a multiset
Parameters
==========
multiplicities
list of integer multiplicities of the components of the multiset.
components
the components (elements) themselves
Returns
=======
Set of partitions. Each partition is tuple of parts, and each
part is a tuple of components (with repeats to indicate
multiplicity)
Notes
=====
Multiset partitions can be created as equivalence classes of set
partitions, and this function does just that. This approach is
slow and memory intensive compared to the more advanced algorithms
available, but the code is simple and easy to understand. Hence
this routine is strictly for testing -- to provide a
straightforward baseline against which to regress the production
versions. (This code is a simplified version of an earlier
production implementation.)
"""
canon = [] # list of components with repeats
for ct, elem in zip(multiplicities, components):
canon.extend([elem]*ct)
# accumulate the multiset partitions in a set to eliminate dups
cache = set()
n = len(canon)
for nc, q in _set_partitions(n):
rv = [[] for i in range(nc)]
for i in range(n):
rv[q[i]].append(canon[i])
canonical = tuple(
sorted([tuple(p) for p in rv]))
cache.add(canonical)
return cache
def compare_multiset_w_baseline(multiplicities):
"""
Enumerates the partitions of multiset with AOCP algorithm and
baseline implementation, and compare the results.
"""
letters = "abcdefghijklmnopqrstuvwxyz"
bl_partitions = multiset_partitions_baseline(multiplicities, letters)
# The partitions returned by the different algorithms may have
# their parts in different orders. Also, they generate partitions
# in different orders. Hence the sorting, and set comparison.
aocp_partitions = set()
for state in multiset_partitions_taocp(multiplicities):
p1 = tuple(sorted(
[tuple(p) for p in list_visitor(state, letters)]))
aocp_partitions.add(p1)
assert bl_partitions == aocp_partitions
def compare_multiset_states(s1, s2):
"""compare for equality two instances of multiset partition states
This is useful for comparing different versions of the algorithm
to verify correctness."""
# Comparison is physical, the only use of semantics is to ignore
# trash off the top of the stack.
f1, lpart1, pstack1 = s1
f2, lpart2, pstack2 = s2
if (lpart1 == lpart2) and (f1[0:lpart1+1] == f2[0:lpart2+1]):
if pstack1[0:f1[lpart1+1]] == pstack2[0:f2[lpart2+1]]:
return True
return False
def test_multiset_partitions_taocp():
"""Compares the output of multiset_partitions_taocp with a baseline
(set partition based) implementation."""
# Test cases should not be too large, since the baseline
# implementation is fairly slow.
multiplicities = [2,2]
compare_multiset_w_baseline(multiplicities)
multiplicities = [4,3,1]
compare_multiset_w_baseline(multiplicities)
def test_multiset_partitions_versions():
"""Compares Knuth-based versions of multiset_partitions"""
multiplicities = [5,2,2,1]
m = MultisetPartitionTraverser()
for s1, s2 in zip_longest(m.enum_all(multiplicities),
multiset_partitions_taocp(multiplicities)):
assert compare_multiset_states(s1, s2)
def subrange_exercise(mult, lb, ub):
"""Compare filter-based and more optimized subrange implementations
Helper for tests, called with both small and larger multisets.
"""
m = MultisetPartitionTraverser()
assert m.count_partitions(mult) == \
m.count_partitions_slow(mult)
# Note - multiple traversals from the same
# MultisetPartitionTraverser object cannot execute at the same
# time, hence make several instances here.
ma = MultisetPartitionTraverser()
mc = MultisetPartitionTraverser()
md = MultisetPartitionTraverser()
# Several paths to compute just the size two partitions
a_it = ma.enum_range(mult, lb, ub)
b_it = part_range_filter(multiset_partitions_taocp(mult), lb, ub)
c_it = part_range_filter(mc.enum_small(mult, ub), lb, sum(mult))
d_it = part_range_filter(md.enum_large(mult, lb), 0, ub)
for sa, sb, sc, sd in zip_longest(a_it, b_it, c_it, d_it):
assert compare_multiset_states(sa, sb)
assert compare_multiset_states(sa, sc)
assert compare_multiset_states(sa, sd)
def test_subrange():
# Quick, but doesn't hit some of the corner cases
mult = [4,4,2,1] # mississippi
lb = 1
ub = 2
subrange_exercise(mult, lb, ub)
def test_subrange_large():
# takes a second or so, depending on cpu, Python version, etc.
mult = [6,3,2,1]
lb = 4
ub = 7
subrange_exercise(mult, lb, ub)
|