File size: 2,567 Bytes
7885a28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
{{py:

"""
Dataset abstractions for sequential data access.

Template file for easily generate fused types consistent code using Tempita
(https://github.com/cython/cython/blob/master/Cython/Tempita/_tempita.py).

Generated file: _seq_dataset.pxd

Each class is duplicated for all dtypes (float and double). The keywords
between double braces are substituted during the build.
"""

# name_suffix, c_type
dtypes = [('64', 'float64_t'),
          ('32', 'float32_t')]

}}
"""Dataset abstractions for sequential data access."""

from ._typedefs cimport float32_t, float64_t, intp_t, uint32_t

# SequentialDataset and its two concrete subclasses are (optionally randomized)
# iterators over the rows of a matrix X and corresponding target values y.

{{for name_suffix, c_type in dtypes}}

#------------------------------------------------------------------------------

cdef class SequentialDataset{{name_suffix}}:
    cdef int current_index
    cdef int[::1] index
    cdef int *index_data_ptr
    cdef Py_ssize_t n_samples
    cdef uint32_t seed

    cdef void shuffle(self, uint32_t seed) noexcept nogil
    cdef int _get_next_index(self) noexcept nogil
    cdef int _get_random_index(self) noexcept nogil

    cdef void _sample(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
                      int *nnz, {{c_type}} *y, {{c_type}} *sample_weight,
                      int current_index) noexcept nogil
    cdef void next(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
                   int *nnz, {{c_type}} *y, {{c_type}} *sample_weight) noexcept nogil
    cdef int random(self, {{c_type}} **x_data_ptr, int **x_ind_ptr,
                    int *nnz, {{c_type}} *y, {{c_type}} *sample_weight) noexcept nogil


cdef class ArrayDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
    cdef const {{c_type}}[:, ::1] X
    cdef const {{c_type}}[::1] Y
    cdef const {{c_type}}[::1] sample_weights
    cdef Py_ssize_t n_features
    cdef intp_t X_stride
    cdef {{c_type}} *X_data_ptr
    cdef {{c_type}} *Y_data_ptr
    cdef const int[::1] feature_indices
    cdef int *feature_indices_ptr
    cdef {{c_type}} *sample_weight_data


cdef class CSRDataset{{name_suffix}}(SequentialDataset{{name_suffix}}):
    cdef const {{c_type}}[::1] X_data
    cdef const int[::1] X_indptr
    cdef const int[::1] X_indices
    cdef const {{c_type}}[::1] Y
    cdef const {{c_type}}[::1] sample_weights
    cdef {{c_type}} *X_data_ptr
    cdef int *X_indptr_ptr
    cdef int *X_indices_ptr
    cdef {{c_type}} *Y_data_ptr
    cdef {{c_type}} *sample_weight_data

{{endfor}}