File size: 3,954 Bytes
7885a28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""Tests for _sketches.py."""

import numpy as np
from numpy.testing import assert_, assert_equal
from scipy.linalg import clarkson_woodruff_transform
from scipy.linalg._sketches import cwt_matrix
from scipy.sparse import issparse, rand
from scipy.sparse.linalg import norm


class TestClarksonWoodruffTransform:
    """
    Testing the Clarkson Woodruff Transform
    """
    # set seed for generating test matrices
    rng = np.random.default_rng(1179103485)

    # Test matrix parameters
    n_rows = 2000
    n_cols = 100
    density = 0.1

    # Sketch matrix dimensions
    n_sketch_rows = 200

    # Seeds to test with
    seeds = [1755490010, 934377150, 1391612830, 1752708722, 2008891431,
             1302443994, 1521083269, 1501189312, 1126232505, 1533465685]

    A_dense = rng.random((n_rows, n_cols))
    A_csc = rand(
        n_rows, n_cols, density=density, format='csc', random_state=rng,
    )
    A_csr = rand(
        n_rows, n_cols, density=density, format='csr', random_state=rng,
    )
    A_coo = rand(
        n_rows, n_cols, density=density, format='coo', random_state=rng,
    )

    # Collect the test matrices
    test_matrices = [
        A_dense, A_csc, A_csr, A_coo,
    ]

    # Test vector with norm ~1
    x = rng.random((n_rows, 1)) / np.sqrt(n_rows)

    def test_sketch_dimensions(self):
        for A in self.test_matrices:
            for seed in self.seeds:
                # seed to ensure backwards compatibility post SPEC7
                sketch = clarkson_woodruff_transform(
                    A, self.n_sketch_rows, seed=seed
                )
                assert_(sketch.shape == (self.n_sketch_rows, self.n_cols))

    def test_seed_returns_identical_transform_matrix(self):
        for seed in self.seeds:
            S1 = cwt_matrix(
                self.n_sketch_rows, self.n_rows, rng=seed
            ).toarray()
            S2 = cwt_matrix(
                self.n_sketch_rows, self.n_rows, rng=seed
            ).toarray()
            assert_equal(S1, S2)

    def test_seed_returns_identically(self):
        for A in self.test_matrices:
            for seed in self.seeds:
                sketch1 = clarkson_woodruff_transform(
                    A, self.n_sketch_rows, rng=seed
                )
                sketch2 = clarkson_woodruff_transform(
                    A, self.n_sketch_rows, rng=seed
                )
                if issparse(sketch1):
                    sketch1 = sketch1.toarray()
                if issparse(sketch2):
                    sketch2 = sketch2.toarray()
                assert_equal(sketch1, sketch2)

    def test_sketch_preserves_frobenius_norm(self):
        # Given the probabilistic nature of the sketches
        # we run the test multiple times and check that
        # we pass all/almost all the tries.
        n_errors = 0
        for A in self.test_matrices:
            if issparse(A):
                true_norm = norm(A)
            else:
                true_norm = np.linalg.norm(A)
            for seed in self.seeds:
                sketch = clarkson_woodruff_transform(
                    A, self.n_sketch_rows, rng=seed,
                )
                if issparse(sketch):
                    sketch_norm = norm(sketch)
                else:
                    sketch_norm = np.linalg.norm(sketch)

                if np.abs(true_norm - sketch_norm) > 0.1 * true_norm:
                    n_errors += 1
        assert_(n_errors == 0)

    def test_sketch_preserves_vector_norm(self):
        n_errors = 0
        n_sketch_rows = int(np.ceil(2. / (0.01 * 0.5**2)))
        true_norm = np.linalg.norm(self.x)
        for seed in self.seeds:
            sketch = clarkson_woodruff_transform(
                self.x, n_sketch_rows, rng=seed,
            )
            sketch_norm = np.linalg.norm(sketch)

            if np.abs(true_norm - sketch_norm) > 0.5 * true_norm:
                n_errors += 1
        assert_(n_errors == 0)