File size: 24,062 Bytes

7885a28

# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

import warnings

import numpy as np
import pytest

from sklearn.cluster import DBSCAN, OPTICS
from sklearn.cluster._optics import _extend_region, _extract_xi_labels
from sklearn.cluster.tests.common import generate_clustered_data
from sklearn.datasets import make_blobs
from sklearn.exceptions import DataConversionWarning, EfficiencyWarning
from sklearn.metrics.cluster import contingency_matrix
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.utils import shuffle
from sklearn.utils._testing import assert_allclose, assert_array_equal
from sklearn.utils.fixes import CSR_CONTAINERS

rng = np.random.RandomState(0)
n_points_per_cluster = 10
C1 = [-5, -2] + 0.8 * rng.randn(n_points_per_cluster, 2)
C2 = [4, -1] + 0.1 * rng.randn(n_points_per_cluster, 2)
C3 = [1, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + 0.3 * rng.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))


@pytest.mark.parametrize(
    ("r_plot", "end"),
    [
        [[10, 8.9, 8.8, 8.7, 7, 10], 3],
        [[10, 8.9, 8.8, 8.7, 8.6, 7, 10], 0],
        [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
        [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
    ],
)
def test_extend_downward(r_plot, end):
    r_plot = np.array(r_plot)
    ratio = r_plot[:-1] / r_plot[1:]
    steep_downward = ratio >= 1 / 0.9
    upward = ratio < 1

    e = _extend_region(steep_downward, upward, 0, 2)
    assert e == end


@pytest.mark.parametrize(
    ("r_plot", "end"),
    [
        [[1, 2, 2.1, 2.2, 4, 8, 8, np.inf], 6],
        [[1, 2, 2.1, 2.2, 2.3, 4, 8, 8, np.inf], 0],
        [[1, 2, 2.1, 2, np.inf], 0],
        [[1, 2, 2.1, np.inf], 2],
    ],
)
def test_extend_upward(r_plot, end):
    r_plot = np.array(r_plot)
    ratio = r_plot[:-1] / r_plot[1:]
    steep_upward = ratio <= 0.9
    downward = ratio > 1

    e = _extend_region(steep_upward, downward, 0, 2)
    assert e == end


@pytest.mark.parametrize(
    ("ordering", "clusters", "expected"),
    [
        [[0, 1, 2, 3], [[0, 1], [2, 3]], [0, 0, 1, 1]],
        [[0, 1, 2, 3], [[0, 1], [3, 3]], [0, 0, -1, 1]],
        [[0, 1, 2, 3], [[0, 1], [3, 3], [0, 3]], [0, 0, -1, 1]],
        [[3, 1, 2, 0], [[0, 1], [3, 3], [0, 3]], [1, 0, -1, 0]],
    ],
)
def test_the_extract_xi_labels(ordering, clusters, expected):
    labels = _extract_xi_labels(ordering, clusters)

    assert_array_equal(labels, expected)


def test_extract_xi(global_dtype):
    # small and easy test (no clusters around other clusters)
    # but with a clear noise data.
    rng = np.random.RandomState(0)
    n_points_per_cluster = 5

    C1 = [-5, -2] + 0.8 * rng.randn(n_points_per_cluster, 2)
    C2 = [4, -1] + 0.1 * rng.randn(n_points_per_cluster, 2)
    C3 = [1, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C4 = [-2, 3] + 0.3 * rng.randn(n_points_per_cluster, 2)
    C5 = [3, -2] + 0.6 * rng.randn(n_points_per_cluster, 2)
    C6 = [5, 6] + 0.2 * rng.randn(n_points_per_cluster, 2)

    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]]), C6)).astype(
        global_dtype, copy=False
    )
    expected_labels = np.r_[[2] * 5, [0] * 5, [1] * 5, [3] * 5, [1] * 5, -1, [4] * 5]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(
        min_samples=3, min_cluster_size=2, max_eps=20, cluster_method="xi", xi=0.4
    ).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    # check float min_samples and min_cluster_size
    clust = OPTICS(
        min_samples=0.1, min_cluster_size=0.08, max_eps=20, cluster_method="xi", xi=0.4
    ).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]] * 2), C6)).astype(
        global_dtype, copy=False
    )
    expected_labels = np.r_[
        [1] * 5, [3] * 5, [2] * 5, [0] * 5, [2] * 5, -1, -1, [4] * 5
    ]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(
        min_samples=3, min_cluster_size=3, max_eps=20, cluster_method="xi", xi=0.3
    ).fit(X)
    # this may fail if the predecessor correction is not at work!
    assert_array_equal(clust.labels_, expected_labels)

    C1 = [[0, 0], [0, 0.1], [0, -0.1], [0.1, 0]]
    C2 = [[10, 10], [10, 9], [10, 11], [9, 10]]
    C3 = [[100, 100], [100, 90], [100, 110], [90, 100]]
    X = np.vstack((C1, C2, C3)).astype(global_dtype, copy=False)
    expected_labels = np.r_[[0] * 4, [1] * 4, [2] * 4]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(
        min_samples=2, min_cluster_size=2, max_eps=np.inf, cluster_method="xi", xi=0.04
    ).fit(X)
    assert_array_equal(clust.labels_, expected_labels)


def test_cluster_hierarchy_(global_dtype):
    rng = np.random.RandomState(0)
    n_points_per_cluster = 100
    C1 = [0, 0] + 2 * rng.randn(n_points_per_cluster, 2).astype(
        global_dtype, copy=False
    )
    C2 = [0, 0] + 50 * rng.randn(n_points_per_cluster, 2).astype(
        global_dtype, copy=False
    )
    X = np.vstack((C1, C2))
    X = shuffle(X, random_state=0)

    clusters = OPTICS(min_samples=20, xi=0.1).fit(X).cluster_hierarchy_
    assert clusters.shape == (2, 2)
    diff = np.sum(clusters - np.array([[0, 99], [0, 199]]))
    assert diff / len(X) < 0.05


@pytest.mark.parametrize(
    "csr_container, metric",
    [(None, "minkowski")] + [(container, "euclidean") for container in CSR_CONTAINERS],
)
def test_correct_number_of_clusters(metric, csr_container):
    # in 'auto' mode

    n_clusters = 3
    X = generate_clustered_data(n_clusters=n_clusters)
    # Parameters chosen specifically for this task.
    # Compute OPTICS
    clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, xi=0.1, metric=metric)
    clust.fit(csr_container(X) if csr_container is not None else X)
    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
    assert n_clusters_1 == n_clusters

    # check attribute types and sizes
    assert clust.labels_.shape == (len(X),)
    assert clust.labels_.dtype.kind == "i"

    assert clust.reachability_.shape == (len(X),)
    assert clust.reachability_.dtype.kind == "f"

    assert clust.core_distances_.shape == (len(X),)
    assert clust.core_distances_.dtype.kind == "f"

    assert clust.ordering_.shape == (len(X),)
    assert clust.ordering_.dtype.kind == "i"
    assert set(clust.ordering_) == set(range(len(X)))


def test_minimum_number_of_sample_check():
    # test that we check a minimum number of samples
    msg = "min_samples must be no greater than"

    # Compute OPTICS
    X = [[1, 1]]
    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1.0)

    # Run the fit
    with pytest.raises(ValueError, match=msg):
        clust.fit(X)


def test_bad_extract():
    # Test an extraction of eps too close to original eps
    msg = "Specify an epsilon smaller than 0.15. Got 0.3."
    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=750, centers=centers, cluster_std=0.4, random_state=0
    )

    # Compute OPTICS
    clust = OPTICS(max_eps=5.0 * 0.03, cluster_method="dbscan", eps=0.3, min_samples=10)
    with pytest.raises(ValueError, match=msg):
        clust.fit(X)


def test_bad_reachability():
    msg = "All reachability values are inf. Set a larger max_eps."
    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=750, centers=centers, cluster_std=0.4, random_state=0
    )

    with pytest.warns(UserWarning, match=msg):
        clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10, eps=0.015)
        clust.fit(X)


def test_nowarn_if_metric_bool_data_bool():
    # make sure no warning is raised if metric and data are both boolean
    # non-regression test for
    # https://github.com/scikit-learn/scikit-learn/issues/18996

    pairwise_metric = "rogerstanimoto"
    X = np.random.randint(2, size=(5, 2), dtype=bool)

    with warnings.catch_warnings():
        warnings.simplefilter("error", DataConversionWarning)

        OPTICS(metric=pairwise_metric).fit(X)


def test_warn_if_metric_bool_data_no_bool():
    # make sure a *single* conversion warning is raised if metric is boolean
    # but data isn't
    # non-regression test for
    # https://github.com/scikit-learn/scikit-learn/issues/18996

    pairwise_metric = "rogerstanimoto"
    X = np.random.randint(2, size=(5, 2), dtype=np.int32)
    msg = f"Data will be converted to boolean for metric {pairwise_metric}"

    with pytest.warns(DataConversionWarning, match=msg) as warn_record:
        OPTICS(metric=pairwise_metric).fit(X)
        assert len(warn_record) == 1


def test_nowarn_if_metric_no_bool():
    # make sure no conversion warning is raised if
    # metric isn't boolean, no matter what the data type is
    pairwise_metric = "minkowski"
    X_bool = np.random.randint(2, size=(5, 2), dtype=bool)
    X_num = np.random.randint(2, size=(5, 2), dtype=np.int32)

    with warnings.catch_warnings():
        warnings.simplefilter("error", DataConversionWarning)

        # fit boolean data
        OPTICS(metric=pairwise_metric).fit(X_bool)
        # fit numeric data
        OPTICS(metric=pairwise_metric).fit(X_num)


def test_close_extract():
    # Test extract where extraction eps is close to scaled max_eps

    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=750, centers=centers, cluster_std=0.4, random_state=0
    )

    # Compute OPTICS
    clust = OPTICS(max_eps=1.0, cluster_method="dbscan", eps=0.3, min_samples=10).fit(X)
    # Cluster ordering starts at 0; max cluster label = 2 is 3 clusters
    assert max(clust.labels_) == 2


@pytest.mark.parametrize("eps", [0.1, 0.3, 0.5])
@pytest.mark.parametrize("min_samples", [3, 10, 20])
@pytest.mark.parametrize(
    "csr_container, metric",
    [(None, "minkowski"), (None, "euclidean")]
    + [(container, "euclidean") for container in CSR_CONTAINERS],
)
def test_dbscan_optics_parity(eps, min_samples, metric, global_dtype, csr_container):
    # Test that OPTICS clustering labels are <= 5% difference of DBSCAN

    centers = [[1, 1], [-1, -1], [1, -1]]
    X, labels_true = make_blobs(
        n_samples=150, centers=centers, cluster_std=0.4, random_state=0
    )
    X = csr_container(X) if csr_container is not None else X

    X = X.astype(global_dtype, copy=False)

    # calculate optics with dbscan extract at 0.3 epsilon
    op = OPTICS(
        min_samples=min_samples, cluster_method="dbscan", eps=eps, metric=metric
    ).fit(X)

    # calculate dbscan labels
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)

    contingency = contingency_matrix(db.labels_, op.labels_)
    agree = min(
        np.sum(np.max(contingency, axis=0)), np.sum(np.max(contingency, axis=1))
    )
    disagree = X.shape[0] - agree

    percent_mismatch = np.round((disagree - 1) / X.shape[0], 2)

    # verify label mismatch is <= 5% labels
    assert percent_mismatch <= 0.05


def test_min_samples_edge_case(global_dtype):
    C1 = [[0, 0], [0, 0.1], [0, -0.1]]
    C2 = [[10, 10], [10, 9], [10, 11]]
    C3 = [[100, 100], [100, 96], [100, 106]]
    X = np.vstack((C1, C2, C3)).astype(global_dtype, copy=False)

    expected_labels = np.r_[[0] * 3, [1] * 3, [2] * 3]
    clust = OPTICS(min_samples=3, max_eps=7, cluster_method="xi", xi=0.04).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    expected_labels = np.r_[[0] * 3, [1] * 3, [-1] * 3]
    clust = OPTICS(min_samples=3, max_eps=3, cluster_method="xi", xi=0.04).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    expected_labels = np.r_[[-1] * 9]
    with pytest.warns(UserWarning, match="All reachability values"):
        clust = OPTICS(min_samples=4, max_eps=3, cluster_method="xi", xi=0.04).fit(X)
        assert_array_equal(clust.labels_, expected_labels)


# try arbitrary minimum sizes
@pytest.mark.parametrize("min_cluster_size", range(2, X.shape[0] // 10, 23))
def test_min_cluster_size(min_cluster_size, global_dtype):
    redX = X[::2].astype(global_dtype, copy=False)  # reduce for speed
    clust = OPTICS(min_samples=9, min_cluster_size=min_cluster_size).fit(redX)
    cluster_sizes = np.bincount(clust.labels_[clust.labels_ != -1])
    if cluster_sizes.size:
        assert min(cluster_sizes) >= min_cluster_size
    # check behaviour is the same when min_cluster_size is a fraction
    clust_frac = OPTICS(
        min_samples=9,
        min_cluster_size=min_cluster_size / redX.shape[0],
    )
    clust_frac.fit(redX)
    assert_array_equal(clust.labels_, clust_frac.labels_)


@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_min_cluster_size_invalid2(csr_container):
    clust = OPTICS(min_cluster_size=len(X) + 1)
    with pytest.raises(ValueError, match="must be no greater than the "):
        clust.fit(X)

    clust = OPTICS(min_cluster_size=len(X) + 1, metric="euclidean")
    with pytest.raises(ValueError, match="must be no greater than the "):
        clust.fit(csr_container(X))


def test_processing_order():
    # Ensure that we consider all unprocessed points,
    # not only direct neighbors. when picking the next point.
    Y = [[0], [10], [-10], [25]]

    clust = OPTICS(min_samples=3, max_eps=15).fit(Y)
    assert_array_equal(clust.reachability_, [np.inf, 10, 10, 15])
    assert_array_equal(clust.core_distances_, [10, 15, np.inf, np.inf])
    assert_array_equal(clust.ordering_, [0, 1, 2, 3])


def test_compare_to_ELKI():
    # Expected values, computed with (future) ELKI 0.7.5 using:
    # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter
    #   -algorithm clustering.optics.OPTICSHeap -optics.minpts 5
    # where the FixedDBIDsFilter gives 0-indexed ids.
    r1 = [
        np.inf,
        1.0574896366427478,
        0.7587934993548423,
        0.7290174038973836,
        0.7290174038973836,
        0.7290174038973836,
        0.6861627576116127,
        0.7587934993548423,
        0.9280118450166668,
        1.1748022534146194,
        3.3355455741292257,
        0.49618389254482587,
        0.2552805046961355,
        0.2552805046961355,
        0.24944622248445714,
        0.24944622248445714,
        0.24944622248445714,
        0.2552805046961355,
        0.2552805046961355,
        0.3086779122185853,
        4.163024452756142,
        1.623152630340929,
        0.45315840475822655,
        0.25468325192031926,
        0.2254004358159971,
        0.18765711877083036,
        0.1821471333893275,
        0.1821471333893275,
        0.18765711877083036,
        0.18765711877083036,
        0.2240202988740153,
        1.154337614548715,
        1.342604473837069,
        1.323308536402633,
        0.8607514948648837,
        0.27219111215810565,
        0.13260875220533205,
        0.13260875220533205,
        0.09890587675958984,
        0.09890587675958984,
        0.13548790801634494,
        0.1575483940837384,
        0.17515137170530226,
        0.17575920159442388,
        0.27219111215810565,
        0.6101447895405373,
        1.3189208094864302,
        1.323308536402633,
        2.2509184159764577,
        2.4517810628594527,
        3.675977064404973,
        3.8264795626020365,
        2.9130735341510614,
        2.9130735341510614,
        2.9130735341510614,
        2.9130735341510614,
        2.8459300127258036,
        2.8459300127258036,
        2.8459300127258036,
        3.0321982337972537,
    ]
    o1 = [
        0,
        3,
        6,
        4,
        7,
        8,
        2,
        9,
        5,
        1,
        31,
        30,
        32,
        34,
        33,
        38,
        39,
        35,
        37,
        36,
        44,
        21,
        23,
        24,
        22,
        25,
        27,
        29,
        26,
        28,
        20,
        40,
        45,
        46,
        10,
        15,
        11,
        13,
        17,
        19,
        18,
        12,
        16,
        14,
        47,
        49,
        43,
        48,
        42,
        41,
        53,
        57,
        51,
        52,
        56,
        59,
        54,
        55,
        58,
        50,
    ]
    p1 = [
        -1,
        0,
        3,
        6,
        6,
        6,
        8,
        3,
        7,
        5,
        1,
        31,
        30,
        30,
        34,
        34,
        34,
        32,
        32,
        37,
        36,
        44,
        21,
        23,
        24,
        22,
        25,
        25,
        22,
        22,
        22,
        21,
        40,
        45,
        46,
        10,
        15,
        15,
        13,
        13,
        15,
        11,
        19,
        15,
        10,
        47,
        12,
        45,
        14,
        43,
        42,
        53,
        57,
        57,
        57,
        57,
        59,
        59,
        59,
        58,
    ]

    # Tests against known extraction array
    # Does NOT work with metric='euclidean', because sklearn euclidean has
    # worse numeric precision. 'minkowski' is slower but more accurate.
    clust1 = OPTICS(min_samples=5).fit(X)

    assert_array_equal(clust1.ordering_, np.array(o1))
    assert_array_equal(clust1.predecessor_[clust1.ordering_], np.array(p1))
    assert_allclose(clust1.reachability_[clust1.ordering_], np.array(r1))
    # ELKI currently does not print the core distances (which are not used much
    # in literature, but we can at least ensure to have this consistency:
    for i in clust1.ordering_[1:]:
        assert clust1.reachability_[i] >= clust1.core_distances_[clust1.predecessor_[i]]

    # Expected values, computed with (future) ELKI 0.7.5 using
    r2 = [
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        0.27219111215810565,
        0.13260875220533205,
        0.13260875220533205,
        0.09890587675958984,
        0.09890587675958984,
        0.13548790801634494,
        0.1575483940837384,
        0.17515137170530226,
        0.17575920159442388,
        0.27219111215810565,
        0.4928068613197889,
        np.inf,
        0.2666183922512113,
        0.18765711877083036,
        0.1821471333893275,
        0.1821471333893275,
        0.1821471333893275,
        0.18715928772277457,
        0.18765711877083036,
        0.18765711877083036,
        0.25468325192031926,
        np.inf,
        0.2552805046961355,
        0.2552805046961355,
        0.24944622248445714,
        0.24944622248445714,
        0.24944622248445714,
        0.2552805046961355,
        0.2552805046961355,
        0.3086779122185853,
        0.34466409325984865,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
    ]
    o2 = [
        0,
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        8,
        9,
        10,
        15,
        11,
        13,
        17,
        19,
        18,
        12,
        16,
        14,
        47,
        46,
        20,
        22,
        25,
        23,
        27,
        29,
        24,
        26,
        28,
        21,
        30,
        32,
        34,
        33,
        38,
        39,
        35,
        37,
        36,
        31,
        40,
        41,
        42,
        43,
        44,
        45,
        48,
        49,
        50,
        51,
        52,
        53,
        54,
        55,
        56,
        57,
        58,
        59,
    ]
    p2 = [
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        10,
        15,
        15,
        13,
        13,
        15,
        11,
        19,
        15,
        10,
        47,
        -1,
        20,
        22,
        25,
        25,
        25,
        25,
        22,
        22,
        23,
        -1,
        30,
        30,
        34,
        34,
        34,
        32,
        32,
        37,
        38,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
    ]
    clust2 = OPTICS(min_samples=5, max_eps=0.5).fit(X)

    assert_array_equal(clust2.ordering_, np.array(o2))
    assert_array_equal(clust2.predecessor_[clust2.ordering_], np.array(p2))
    assert_allclose(clust2.reachability_[clust2.ordering_], np.array(r2))

    index = np.where(clust1.core_distances_ <= 0.5)[0]
    assert_allclose(clust1.core_distances_[index], clust2.core_distances_[index])


def test_extract_dbscan(global_dtype):
    # testing an easy dbscan case. Not including clusters with different
    # densities.
    rng = np.random.RandomState(0)
    n_points_per_cluster = 20
    C1 = [-5, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C2 = [4, -1] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C3 = [1, 2] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C4 = [-2, 3] + 0.2 * rng.randn(n_points_per_cluster, 2)
    X = np.vstack((C1, C2, C3, C4)).astype(global_dtype, copy=False)

    clust = OPTICS(cluster_method="dbscan", eps=0.5).fit(X)
    assert_array_equal(np.sort(np.unique(clust.labels_)), [0, 1, 2, 3])


@pytest.mark.parametrize("csr_container", [None] + CSR_CONTAINERS)
def test_precomputed_dists(global_dtype, csr_container):
    redX = X[::2].astype(global_dtype, copy=False)
    dists = pairwise_distances(redX, metric="euclidean")
    dists = csr_container(dists) if csr_container is not None else dists
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", EfficiencyWarning)
        clust1 = OPTICS(min_samples=10, algorithm="brute", metric="precomputed").fit(
            dists
        )
    clust2 = OPTICS(min_samples=10, algorithm="brute", metric="euclidean").fit(redX)

    assert_allclose(clust1.reachability_, clust2.reachability_)
    assert_array_equal(clust1.labels_, clust2.labels_)


@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_optics_input_not_modified_precomputed_sparse_nodiag(csr_container):
    """Check that we don't modify in-place the pre-computed sparse matrix.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27508
    """
    X = np.random.RandomState(0).rand(6, 6)
    # Add zeros on the diagonal that will be implicit when creating
    # the sparse matrix. If `X` is modified in-place, the zeros from
    # the diagonal will be made explicit.
    np.fill_diagonal(X, 0)
    X = csr_container(X)
    assert all(row != col for row, col in zip(*X.nonzero()))
    X_copy = X.copy()
    OPTICS(metric="precomputed").fit(X)
    # Make sure that we did not modify `X` in-place even by creating
    # explicit 0s values.
    assert X.nnz == X_copy.nnz
    assert_array_equal(X.toarray(), X_copy.toarray())


def test_optics_predecessor_correction_ordering():
    """Check that cluster correction using predecessor is working as expected.

    In the following example, the predecessor correction was not working properly
    since it was not using the right indices.

    This non-regression test check that reordering the data does not change the results.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/26324
    """
    X_1 = np.array([1, 2, 3, 1, 8, 8, 7, 100]).reshape(-1, 1)
    reorder = [0, 1, 2, 4, 5, 6, 7, 3]
    X_2 = X_1[reorder]

    optics_1 = OPTICS(min_samples=3, metric="euclidean").fit(X_1)
    optics_2 = OPTICS(min_samples=3, metric="euclidean").fit(X_2)

    assert_array_equal(optics_1.labels_[reorder], optics_2.labels_)