#line 1 "numpy/linalg/umath_linalg.c.src" /* ***************************************************************************** ** This file was autogenerated from a template DO NOT EDIT!!!! ** ** Changes should be made to the original source (.src) file ** ***************************************************************************** */ #line 1 /* -*- c -*- */ /* ***************************************************************************** ** INCLUDES ** ***************************************************************************** */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #include "Python.h" #include "numpy/arrayobject.h" #include "numpy/ufuncobject.h" #include "npy_pycompat.h" #include "npy_config.h" #include #include #include #include static const char* umath_linalg_version_string = "0.1.4"; /* **************************************************************************** * Debugging support * **************************************************************************** */ #define TRACE_TXT(...) do { fprintf (stderr, __VA_ARGS__); } while (0) #define STACK_TRACE do {} while (0) #define TRACE\ do { \ fprintf (stderr, \ "%s:%d:%s\n", \ __FILE__, \ __LINE__, \ __FUNCTION__); \ STACK_TRACE; \ } while (0) #if 0 #include void dbg_stack_trace() { void *trace[32]; size_t size; size = backtrace(trace, sizeof(trace)/sizeof(trace[0])); backtrace_symbols_fd(trace, size, 1); } #undef STACK_TRACE #define STACK_TRACE do { dbg_stack_trace(); } while (0) #endif /* ***************************************************************************** * BLAS/LAPACK calling macros * ***************************************************************************** */ #ifdef NO_APPEND_FORTRAN # define FNAME(x) x #else # define FNAME(x) x##_ #endif typedef struct { float r, i; } f2c_complex; typedef struct { double r, i; } f2c_doublecomplex; /* typedef long int (*L_fp)(); */ extern int FNAME(sgeev)(char *jobvl, char *jobvr, int *n, float a[], int *lda, float wr[], float wi[], float vl[], int *ldvl, float vr[], int *ldvr, float work[], int lwork[], int *info); extern int FNAME(dgeev)(char *jobvl, char *jobvr, int *n, double a[], int *lda, double wr[], double wi[], double vl[], int *ldvl, double vr[], int *ldvr, double work[], int lwork[], int *info); extern int FNAME(cgeev)(char *jobvl, char *jobvr, int *n, f2c_doublecomplex a[], int *lda, f2c_doublecomplex w[], f2c_doublecomplex vl[], int *ldvl, f2c_doublecomplex vr[], int *ldvr, f2c_doublecomplex work[], int *lwork, double rwork[], int *info); extern int FNAME(zgeev)(char *jobvl, char *jobvr, int *n, f2c_doublecomplex a[], int *lda, f2c_doublecomplex w[], f2c_doublecomplex vl[], int *ldvl, f2c_doublecomplex vr[], int *ldvr, f2c_doublecomplex work[], int *lwork, double rwork[], int *info); extern int FNAME(ssyevd)(char *jobz, char *uplo, int *n, float a[], int *lda, float w[], float work[], int *lwork, int iwork[], int *liwork, int *info); extern int FNAME(dsyevd)(char *jobz, char *uplo, int *n, double a[], int *lda, double w[], double work[], int *lwork, int iwork[], int *liwork, int *info); extern int FNAME(cheevd)(char *jobz, char *uplo, int *n, f2c_complex a[], int *lda, float w[], f2c_complex work[], int *lwork, float rwork[], int *lrwork, int iwork[], int *liwork, int *info); extern int FNAME(zheevd)(char *jobz, char *uplo, int *n, f2c_doublecomplex a[], int *lda, double w[], f2c_doublecomplex work[], int *lwork, double rwork[], int *lrwork, int iwork[], int *liwork, int *info); extern int FNAME(dgelsd)(int *m, int *n, int *nrhs, double a[], int *lda, double b[], int *ldb, double s[], double *rcond, int *rank, double work[], int *lwork, int iwork[], int *info); extern int FNAME(zgelsd)(int *m, int *n, int *nrhs, f2c_doublecomplex a[], int *lda, f2c_doublecomplex b[], int *ldb, double s[], double *rcond, int *rank, f2c_doublecomplex work[], int *lwork, double rwork[], int iwork[], int *info); extern int FNAME(sgesv)(int *n, int *nrhs, float a[], int *lda, int ipiv[], float b[], int *ldb, int *info); extern int FNAME(dgesv)(int *n, int *nrhs, double a[], int *lda, int ipiv[], double b[], int *ldb, int *info); extern int FNAME(cgesv)(int *n, int *nrhs, f2c_complex a[], int *lda, int ipiv[], f2c_complex b[], int *ldb, int *info); extern int FNAME(zgesv)(int *n, int *nrhs, f2c_doublecomplex a[], int *lda, int ipiv[], f2c_doublecomplex b[], int *ldb, int *info); extern int FNAME(sgetrf)(int *m, int *n, float a[], int *lda, int ipiv[], int *info); extern int FNAME(dgetrf)(int *m, int *n, double a[], int *lda, int ipiv[], int *info); extern int FNAME(cgetrf)(int *m, int *n, f2c_complex a[], int *lda, int ipiv[], int *info); extern int FNAME(zgetrf)(int *m, int *n, f2c_doublecomplex a[], int *lda, int ipiv[], int *info); extern int FNAME(spotrf)(char *uplo, int *n, float a[], int *lda, int *info); extern int FNAME(dpotrf)(char *uplo, int *n, double a[], int *lda, int *info); extern int FNAME(cpotrf)(char *uplo, int *n, f2c_complex a[], int *lda, int *info); extern int FNAME(zpotrf)(char *uplo, int *n, f2c_doublecomplex a[], int *lda, int *info); extern int FNAME(sgesdd)(char *jobz, int *m, int *n, float a[], int *lda, float s[], float u[], int *ldu, float vt[], int *ldvt, float work[], int *lwork, int iwork[], int *info); extern int FNAME(dgesdd)(char *jobz, int *m, int *n, double a[], int *lda, double s[], double u[], int *ldu, double vt[], int *ldvt, double work[], int *lwork, int iwork[], int *info); extern int FNAME(cgesdd)(char *jobz, int *m, int *n, f2c_complex a[], int *lda, float s[], f2c_complex u[], int *ldu, f2c_complex vt[], int *ldvt, f2c_complex work[], int *lwork, float rwork[], int iwork[], int *info); extern int FNAME(zgesdd)(char *jobz, int *m, int *n, f2c_doublecomplex a[], int *lda, double s[], f2c_doublecomplex u[], int *ldu, f2c_doublecomplex vt[], int *ldvt, f2c_doublecomplex work[], int *lwork, double rwork[], int iwork[], int *info); extern int FNAME(spotrs)(char *uplo, int *n, int *nrhs, float a[], int *lda, float b[], int *ldb, int *info); extern int FNAME(dpotrs)(char *uplo, int *n, int *nrhs, double a[], int *lda, double b[], int *ldb, int *info); extern int FNAME(cpotrs)(char *uplo, int *n, int *nrhs, f2c_complex a[], int *lda, f2c_complex b[], int *ldb, int *info); extern int FNAME(zpotrs)(char *uplo, int *n, int *nrhs, f2c_doublecomplex a[], int *lda, f2c_doublecomplex b[], int *ldb, int *info); extern int FNAME(spotri)(char *uplo, int *n, float a[], int *lda, int *info); extern int FNAME(dpotri)(char *uplo, int *n, double a[], int *lda, int *info); extern int FNAME(cpotri)(char *uplo, int *n, f2c_complex a[], int *lda, int *info); extern int FNAME(zpotri)(char *uplo, int *n, f2c_doublecomplex a[], int *lda, int *info); extern int FNAME(scopy)(int *n, float *sx, int *incx, float *sy, int *incy); extern int FNAME(dcopy)(int *n, double *sx, int *incx, double *sy, int *incy); extern int FNAME(ccopy)(int *n, f2c_complex *sx, int *incx, f2c_complex *sy, int *incy); extern int FNAME(zcopy)(int *n, f2c_doublecomplex *sx, int *incx, f2c_doublecomplex *sy, int *incy); extern float FNAME(sdot)(int *n, float *sx, int *incx, float *sy, int *incy); extern double FNAME(ddot)(int *n, double *sx, int *incx, double *sy, int *incy); extern f2c_complex FNAME(cdotu)(int *n, f2c_complex *sx, int *incx, f2c_complex *sy, int *incy); extern f2c_doublecomplex FNAME(zdotu)(int *n, f2c_doublecomplex *sx, int *incx, f2c_doublecomplex *sy, int *incy); extern f2c_complex FNAME(cdotc)(int *n, f2c_complex *sx, int *incx, f2c_complex *sy, int *incy); extern f2c_doublecomplex FNAME(zdotc)(int *n, f2c_doublecomplex *sx, int *incx, f2c_doublecomplex *sy, int *incy); extern int FNAME(sgemm)(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc); extern int FNAME(dgemm)(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc); extern int FNAME(cgemm)(char *transa, char *transb, int *m, int *n, int *k, f2c_complex *alpha, f2c_complex *a, int *lda, f2c_complex *b, int *ldb, f2c_complex *beta, f2c_complex *c, int *ldc); extern int FNAME(zgemm)(char *transa, char *transb, int *m, int *n, int *k, f2c_doublecomplex *alpha, f2c_doublecomplex *a, int *lda, f2c_doublecomplex *b, int *ldb, f2c_doublecomplex *beta, f2c_doublecomplex *c, int *ldc); #define LAPACK_T(FUNC) \ TRACE_TXT("Calling LAPACK ( " # FUNC " )\n"); \ FNAME(FUNC) #define BLAS(FUNC) \ FNAME(FUNC) #define LAPACK(FUNC) \ FNAME(FUNC) typedef int fortran_int; typedef float fortran_real; typedef double fortran_doublereal; typedef f2c_complex fortran_complex; typedef f2c_doublecomplex fortran_doublecomplex; /* ***************************************************************************** ** Some handy functions ** ***************************************************************************** */ static inline void * offset_ptr(void* ptr, ptrdiff_t offset) { return (void*)((npy_uint8*)ptr + offset); } static inline int get_fp_invalid_and_clear(void) { int status; status = npy_clear_floatstatus(); return !!(status & NPY_FPE_INVALID); } static inline void set_fp_invalid_or_clear(int error_occurred) { if (error_occurred) { npy_set_floatstatus_invalid(); } else { npy_clear_floatstatus(); } } /* ***************************************************************************** ** Some handy constants ** ***************************************************************************** */ #define UMATH_LINALG_MODULE_NAME "_umath_linalg" typedef union { fortran_complex f; npy_cfloat npy; float array[2]; } COMPLEX_t; typedef union { fortran_doublecomplex f; npy_cdouble npy; double array[2]; } DOUBLECOMPLEX_t; static float s_one; static float s_zero; static float s_minus_one; static float s_ninf; static float s_nan; static double d_one; static double d_zero; static double d_minus_one; static double d_ninf; static double d_nan; static COMPLEX_t c_one; static COMPLEX_t c_zero; static COMPLEX_t c_minus_one; static COMPLEX_t c_ninf; static COMPLEX_t c_nan; static DOUBLECOMPLEX_t z_one; static DOUBLECOMPLEX_t z_zero; static DOUBLECOMPLEX_t z_minus_one; static DOUBLECOMPLEX_t z_ninf; static DOUBLECOMPLEX_t z_nan; static void init_constants(void) { /* this is needed as NPY_INFINITY and NPY_NAN macros can't be used as initializers. I prefer to just set all the constants the same way. */ s_one = 1.0f; s_zero = 0.0f; s_minus_one = -1.0f; s_ninf = -NPY_INFINITYF; s_nan = NPY_NANF; d_one = 1.0; d_zero = 0.0; d_minus_one = -1.0; d_ninf = -NPY_INFINITY; d_nan = NPY_NAN; c_one.array[0] = 1.0f; c_one.array[1] = 0.0f; c_zero.array[0] = 0.0f; c_zero.array[1] = 0.0f; c_minus_one.array[0] = -1.0f; c_minus_one.array[1] = 0.0f; c_ninf.array[0] = -NPY_INFINITYF; c_ninf.array[1] = 0.0f; c_nan.array[0] = NPY_NANF; c_nan.array[1] = NPY_NANF; z_one.array[0] = 1.0; z_one.array[1] = 0.0; z_zero.array[0] = 0.0; z_zero.array[1] = 0.0; z_minus_one.array[0] = -1.0; z_minus_one.array[1] = 0.0; z_ninf.array[0] = -NPY_INFINITY; z_ninf.array[1] = 0.0; z_nan.array[0] = NPY_NAN; z_nan.array[1] = NPY_NAN; } /* ***************************************************************************** ** Structs used for data rearrangement ** ***************************************************************************** */ /* this struct contains information about how to linearize in a local buffer a matrix so that it can be used by blas functions. All strides are specified in number of elements (similar to what blas expects) dst_row_strides: number of elements between different row. Matrix is considered row-major dst_column_strides: number of elements between differnt columns in the destination buffer rows: number of rows of the matrix columns: number of columns of the matrix src_row_strides: strides needed to access the next row in the source matrix src_column_strides: strides needed to access the next column in the source matrix */ typedef struct linearize_data_struct { size_t rows; size_t columns; ptrdiff_t row_strides; ptrdiff_t column_strides; } LINEARIZE_DATA_t; static inline void init_linearize_data(LINEARIZE_DATA_t *lin_data, int rows, int columns, ptrdiff_t row_strides, ptrdiff_t column_strides) { lin_data->rows = rows; lin_data->columns = columns; lin_data->row_strides = row_strides; lin_data->column_strides = column_strides; } static inline void dump_ufunc_object(PyUFuncObject* ufunc) { TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n", ufunc->core_enabled? "generalized ufunc" : "scalar ufunc", ufunc->name, ufunc->nin, ufunc->nout, ufunc->ntypes); if (ufunc->core_enabled) { int arg; int dim; TRACE_TXT("\t%s (%d dimension(s) detected).\n", ufunc->core_signature, ufunc->core_num_dim_ix); for (arg = 0; arg < ufunc->nargs; arg++){ int * arg_dim_ix = ufunc->core_dim_ixs + ufunc->core_offsets[arg]; TRACE_TXT("\t\targ %d (%s) has %d dimension(s): (", arg, arg < ufunc->nin? "INPUT" : "OUTPUT", ufunc->core_num_dims[arg]); for (dim = 0; dim < ufunc->core_num_dims[arg]; dim ++) { TRACE_TXT(" %d", arg_dim_ix[dim]); } TRACE_TXT(" )\n"); } } } static inline void dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params) { TRACE_TXT("\n\t%s rows: %zd columns: %zd"\ "\n\t\trow_strides: %td column_strides: %td"\ "\n", name, params->rows, params->columns, params->row_strides, params->column_strides); } static inline float FLOAT_add(float op1, float op2) { return op1 + op2; } static inline double DOUBLE_add(double op1, double op2) { return op1 + op2; } static inline COMPLEX_t CFLOAT_add(COMPLEX_t op1, COMPLEX_t op2) { COMPLEX_t result; result.array[0] = op1.array[0] + op2.array[0]; result.array[1] = op1.array[1] + op2.array[1]; return result; } static inline DOUBLECOMPLEX_t CDOUBLE_add(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) { DOUBLECOMPLEX_t result; result.array[0] = op1.array[0] + op2.array[0]; result.array[1] = op1.array[1] + op2.array[1]; return result; } static inline float FLOAT_mul(float op1, float op2) { return op1*op2; } static inline double DOUBLE_mul(double op1, double op2) { return op1*op2; } static inline COMPLEX_t CFLOAT_mul(COMPLEX_t op1, COMPLEX_t op2) { COMPLEX_t result; result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; return result; } static inline DOUBLECOMPLEX_t CDOUBLE_mul(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) { DOUBLECOMPLEX_t result; result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; return result; } static inline float FLOAT_mulc(float op1, float op2) { return op1*op2; } static inline double DOUBLE_mulc(float op1, float op2) { return op1*op2; } static inline COMPLEX_t CFLOAT_mulc(COMPLEX_t op1, COMPLEX_t op2) { COMPLEX_t result; result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; return result; } static inline DOUBLECOMPLEX_t CDOUBLE_mulc(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) { DOUBLECOMPLEX_t result; result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; return result; } static inline void print_FLOAT(npy_float s) { TRACE_TXT(" %8.4f", s); } static inline void print_DOUBLE(npy_double d) { TRACE_TXT(" %10.6f", d); } static inline void print_CFLOAT(npy_cfloat c) { float* c_parts = (float*)&c; TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]); } static inline void print_CDOUBLE(npy_cdouble z) { double* z_parts = (double*)&z; TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]); } #line 681 static inline void dump_FLOAT_matrix(const char* name, size_t rows, size_t columns, const npy_float* ptr) { size_t i,j; TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); for (i=0; icolumns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(float)); fortran_int one = 1; for (i=0; i< data->rows; i++) { if (column_strides > 0) { FNAME(scopy)(&columns, (void*)src, &column_strides, (void*)dst, &one); } else if (column_strides < 0) { FNAME(scopy)(&columns, (void*)((float*)src + (columns-1)*column_strides), &column_strides, (void*)dst, &one); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ for (j = 0; j < columns; ++j) { memcpy((float*)dst + j, (float*)src, sizeof(float)); } } src += data->row_strides/sizeof(float); dst += data->columns; } return rv; } else { return src; } } static inline void * delinearize_FLOAT_matrix(void *dst_in, void *src_in, const LINEARIZE_DATA_t* data) { float *src = (float *) src_in; float *dst = (float *) dst_in; if (src) { int i; float *rv = src; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(float)); fortran_int one = 1; for (i=0; i < data->rows; i++) { if (column_strides > 0) { FNAME(scopy)(&columns, (void*)src, &one, (void*)dst, &column_strides); } else if (column_strides < 0) { FNAME(scopy)(&columns, (void*)src, &one, (void*)((float*)dst + (columns-1)*column_strides), &column_strides); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ if (columns > 0) { memcpy((float*)dst, (float*)src + (columns-1), sizeof(float)); } } src += data->columns; dst += data->row_strides/sizeof(float); } return rv; } else { return src; } } static inline void nan_FLOAT_matrix(void *dst_in, const LINEARIZE_DATA_t* data) { float *dst = (float *) dst_in; int i,j; for (i=0; i < data->rows; i++) { float *cp = dst; ptrdiff_t cs = data->column_strides/sizeof(float); for (j=0; j< data->columns; ++j) { *cp = s_nan; cp += cs; } dst += data->row_strides/sizeof(float); } } #line 806 static inline void * linearize_DOUBLE_matrix(void *dst_in, void *src_in, const LINEARIZE_DATA_t* data) { double *src = (double *) src_in; double *dst = (double *) dst_in; if (dst) { int i, j; double* rv = dst; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(double)); fortran_int one = 1; for (i=0; i< data->rows; i++) { if (column_strides > 0) { FNAME(dcopy)(&columns, (void*)src, &column_strides, (void*)dst, &one); } else if (column_strides < 0) { FNAME(dcopy)(&columns, (void*)((double*)src + (columns-1)*column_strides), &column_strides, (void*)dst, &one); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ for (j = 0; j < columns; ++j) { memcpy((double*)dst + j, (double*)src, sizeof(double)); } } src += data->row_strides/sizeof(double); dst += data->columns; } return rv; } else { return src; } } static inline void * delinearize_DOUBLE_matrix(void *dst_in, void *src_in, const LINEARIZE_DATA_t* data) { double *src = (double *) src_in; double *dst = (double *) dst_in; if (src) { int i; double *rv = src; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(double)); fortran_int one = 1; for (i=0; i < data->rows; i++) { if (column_strides > 0) { FNAME(dcopy)(&columns, (void*)src, &one, (void*)dst, &column_strides); } else if (column_strides < 0) { FNAME(dcopy)(&columns, (void*)src, &one, (void*)((double*)dst + (columns-1)*column_strides), &column_strides); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ if (columns > 0) { memcpy((double*)dst, (double*)src + (columns-1), sizeof(double)); } } src += data->columns; dst += data->row_strides/sizeof(double); } return rv; } else { return src; } } static inline void nan_DOUBLE_matrix(void *dst_in, const LINEARIZE_DATA_t* data) { double *dst = (double *) dst_in; int i,j; for (i=0; i < data->rows; i++) { double *cp = dst; ptrdiff_t cs = data->column_strides/sizeof(double); for (j=0; j< data->columns; ++j) { *cp = d_nan; cp += cs; } dst += data->row_strides/sizeof(double); } } #line 806 static inline void * linearize_CFLOAT_matrix(void *dst_in, void *src_in, const LINEARIZE_DATA_t* data) { COMPLEX_t *src = (COMPLEX_t *) src_in; COMPLEX_t *dst = (COMPLEX_t *) dst_in; if (dst) { int i, j; COMPLEX_t* rv = dst; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(COMPLEX_t)); fortran_int one = 1; for (i=0; i< data->rows; i++) { if (column_strides > 0) { FNAME(ccopy)(&columns, (void*)src, &column_strides, (void*)dst, &one); } else if (column_strides < 0) { FNAME(ccopy)(&columns, (void*)((COMPLEX_t*)src + (columns-1)*column_strides), &column_strides, (void*)dst, &one); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ for (j = 0; j < columns; ++j) { memcpy((COMPLEX_t*)dst + j, (COMPLEX_t*)src, sizeof(COMPLEX_t)); } } src += data->row_strides/sizeof(COMPLEX_t); dst += data->columns; } return rv; } else { return src; } } static inline void * delinearize_CFLOAT_matrix(void *dst_in, void *src_in, const LINEARIZE_DATA_t* data) { COMPLEX_t *src = (COMPLEX_t *) src_in; COMPLEX_t *dst = (COMPLEX_t *) dst_in; if (src) { int i; COMPLEX_t *rv = src; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(COMPLEX_t)); fortran_int one = 1; for (i=0; i < data->rows; i++) { if (column_strides > 0) { FNAME(ccopy)(&columns, (void*)src, &one, (void*)dst, &column_strides); } else if (column_strides < 0) { FNAME(ccopy)(&columns, (void*)src, &one, (void*)((COMPLEX_t*)dst + (columns-1)*column_strides), &column_strides); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ if (columns > 0) { memcpy((COMPLEX_t*)dst, (COMPLEX_t*)src + (columns-1), sizeof(COMPLEX_t)); } } src += data->columns; dst += data->row_strides/sizeof(COMPLEX_t); } return rv; } else { return src; } } static inline void nan_CFLOAT_matrix(void *dst_in, const LINEARIZE_DATA_t* data) { COMPLEX_t *dst = (COMPLEX_t *) dst_in; int i,j; for (i=0; i < data->rows; i++) { COMPLEX_t *cp = dst; ptrdiff_t cs = data->column_strides/sizeof(COMPLEX_t); for (j=0; j< data->columns; ++j) { *cp = c_nan; cp += cs; } dst += data->row_strides/sizeof(COMPLEX_t); } } #line 806 static inline void * linearize_CDOUBLE_matrix(void *dst_in, void *src_in, const LINEARIZE_DATA_t* data) { DOUBLECOMPLEX_t *src = (DOUBLECOMPLEX_t *) src_in; DOUBLECOMPLEX_t *dst = (DOUBLECOMPLEX_t *) dst_in; if (dst) { int i, j; DOUBLECOMPLEX_t* rv = dst; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(DOUBLECOMPLEX_t)); fortran_int one = 1; for (i=0; i< data->rows; i++) { if (column_strides > 0) { FNAME(zcopy)(&columns, (void*)src, &column_strides, (void*)dst, &one); } else if (column_strides < 0) { FNAME(zcopy)(&columns, (void*)((DOUBLECOMPLEX_t*)src + (columns-1)*column_strides), &column_strides, (void*)dst, &one); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ for (j = 0; j < columns; ++j) { memcpy((DOUBLECOMPLEX_t*)dst + j, (DOUBLECOMPLEX_t*)src, sizeof(DOUBLECOMPLEX_t)); } } src += data->row_strides/sizeof(DOUBLECOMPLEX_t); dst += data->columns; } return rv; } else { return src; } } static inline void * delinearize_CDOUBLE_matrix(void *dst_in, void *src_in, const LINEARIZE_DATA_t* data) { DOUBLECOMPLEX_t *src = (DOUBLECOMPLEX_t *) src_in; DOUBLECOMPLEX_t *dst = (DOUBLECOMPLEX_t *) dst_in; if (src) { int i; DOUBLECOMPLEX_t *rv = src; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = (fortran_int)(data->column_strides/sizeof(DOUBLECOMPLEX_t)); fortran_int one = 1; for (i=0; i < data->rows; i++) { if (column_strides > 0) { FNAME(zcopy)(&columns, (void*)src, &one, (void*)dst, &column_strides); } else if (column_strides < 0) { FNAME(zcopy)(&columns, (void*)src, &one, (void*)((DOUBLECOMPLEX_t*)dst + (columns-1)*column_strides), &column_strides); } else { /* * Zero stride has undefined behavior in some BLAS * implementations (e.g. OSX Accelerate), so do it * manually */ if (columns > 0) { memcpy((DOUBLECOMPLEX_t*)dst, (DOUBLECOMPLEX_t*)src + (columns-1), sizeof(DOUBLECOMPLEX_t)); } } src += data->columns; dst += data->row_strides/sizeof(DOUBLECOMPLEX_t); } return rv; } else { return src; } } static inline void nan_CDOUBLE_matrix(void *dst_in, const LINEARIZE_DATA_t* data) { DOUBLECOMPLEX_t *dst = (DOUBLECOMPLEX_t *) dst_in; int i,j; for (i=0; i < data->rows; i++) { DOUBLECOMPLEX_t *cp = dst; ptrdiff_t cs = data->column_strides/sizeof(DOUBLECOMPLEX_t); for (j=0; j< data->columns; ++j) { *cp = z_nan; cp += cs; } dst += data->row_strides/sizeof(DOUBLECOMPLEX_t); } } /* identity square matrix generation */ #line 924 static inline void identity_FLOAT_matrix(void *ptr, size_t n) { size_t i; float *matrix = (float*) ptr; /* in IEEE floating point, zeroes are represented as bitwise 0 */ memset(matrix, 0, n*n*sizeof(float)); for (i = 0; i < n; ++i) { *matrix = s_one; matrix += n+1; } } #line 924 static inline void identity_DOUBLE_matrix(void *ptr, size_t n) { size_t i; double *matrix = (double*) ptr; /* in IEEE floating point, zeroes are represented as bitwise 0 */ memset(matrix, 0, n*n*sizeof(double)); for (i = 0; i < n; ++i) { *matrix = d_one; matrix += n+1; } } #line 924 static inline void identity_CFLOAT_matrix(void *ptr, size_t n) { size_t i; COMPLEX_t *matrix = (COMPLEX_t*) ptr; /* in IEEE floating point, zeroes are represented as bitwise 0 */ memset(matrix, 0, n*n*sizeof(COMPLEX_t)); for (i = 0; i < n; ++i) { *matrix = c_one; matrix += n+1; } } #line 924 static inline void identity_CDOUBLE_matrix(void *ptr, size_t n) { size_t i; DOUBLECOMPLEX_t *matrix = (DOUBLECOMPLEX_t*) ptr; /* in IEEE floating point, zeroes are represented as bitwise 0 */ memset(matrix, 0, n*n*sizeof(DOUBLECOMPLEX_t)); for (i = 0; i < n; ++i) { *matrix = z_one; matrix += n+1; } } /* lower/upper triangular matrix using blas (in place) */ #line 947 static inline void triu_FLOAT_matrix(void *ptr, size_t n) { size_t i,j; float *matrix = (float*)ptr; matrix += n; for (i=1; i < n; ++i) { for (j=0; jA = a; params->W = w; params->WORK = work; params->RWORK = NULL; /* unused */ params->IWORK = iwork; params->N = N; params->LWORK = lwork; params->LRWORK = 0; /* unused */ params->LIWORK = liwork; params->JOBZ = JOBZ; params->UPLO = UPLO; return 1; error: /* something failed */ memset(params, 0, sizeof(*params)); free(mem_buff2); free(mem_buff); return 0; } static inline fortran_int call_ssyevd(EIGH_PARAMS_t *params) { fortran_int rv; LAPACK(ssyevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, params->A, ¶ms->N, params->W, params->WORK, ¶ms->LWORK, params->IWORK, ¶ms->LIWORK, &rv); return rv; } #line 1238 /* * Initialize the parameters to use in for the lapack function _syevd * Handles buffer allocation */ static inline int init_dsyevd(EIGH_PARAMS_t* params, char JOBZ, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_double query_work_size; fortran_int query_iwork_size; fortran_int lwork = -1; fortran_int liwork = -1; fortran_int info; npy_uint8 *a, *w, *work, *iwork; size_t alloc_size = N*(N+1)*sizeof(npy_double); mem_buff = malloc(alloc_size); if (!mem_buff) goto error; a = mem_buff; w = mem_buff + N*N*sizeof(npy_double); LAPACK(dsyevd)(&JOBZ, &UPLO, &N, (fortran_doublereal*)a, &N, (fortran_doublereal*)w, &query_work_size, &lwork, &query_iwork_size, &liwork, &info); if (info != 0) goto error; work = mem_buff; lwork = (fortran_int)query_work_size; liwork = query_iwork_size; mem_buff2 = malloc(lwork*sizeof(npy_double) + liwork*sizeof(fortran_int)); if (!mem_buff2) goto error; work = mem_buff2; iwork = mem_buff2 + lwork*sizeof(npy_double); params->A = a; params->W = w; params->WORK = work; params->RWORK = NULL; /* unused */ params->IWORK = iwork; params->N = N; params->LWORK = lwork; params->LRWORK = 0; /* unused */ params->LIWORK = liwork; params->JOBZ = JOBZ; params->UPLO = UPLO; return 1; error: /* something failed */ memset(params, 0, sizeof(*params)); free(mem_buff2); free(mem_buff); return 0; } static inline fortran_int call_dsyevd(EIGH_PARAMS_t *params) { fortran_int rv; LAPACK(dsyevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, params->A, ¶ms->N, params->W, params->WORK, ¶ms->LWORK, params->IWORK, ¶ms->LIWORK, &rv); return rv; } #line 1327 /* * Initialize the parameters to use in for the lapack function _heev * Handles buffer allocation */ static inline int init_cheevd(EIGH_PARAMS_t *params, char JOBZ, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; fortran_complex query_work_size; fortran_real query_rwork_size; fortran_int query_iwork_size; fortran_int lwork = -1; fortran_int lrwork = -1; fortran_int liwork = -1; npy_uint8 *a, *w, *work, *rwork, *iwork; fortran_int info; mem_buff = malloc(N*N*sizeof(npy_cfloat)+N*sizeof(npy_float)); if (!mem_buff) goto error; a = mem_buff; w = mem_buff+N*N*sizeof(npy_cfloat); LAPACK(cheevd)(&JOBZ, &UPLO, &N, (fortran_complex*)a, &N, (fortran_real*)w, &query_work_size, &lwork, &query_rwork_size, &lrwork, &query_iwork_size, &liwork, &info); if (info != 0) goto error; lwork = (fortran_int)*(fortran_real*)&query_work_size; lrwork = (fortran_int)query_rwork_size; liwork = query_iwork_size; mem_buff2 = malloc(lwork*sizeof(npy_cfloat) + lrwork*sizeof(npy_float) + liwork*sizeof(fortran_int)); if (!mem_buff2) goto error; work = mem_buff2; rwork = work + lwork*sizeof(npy_cfloat); iwork = rwork + lrwork*sizeof(npy_float); params->A = a; params->W = w; params->WORK = work; params->RWORK = rwork; params->IWORK = iwork; params->N = N; params->LWORK = lwork; params->LRWORK = lrwork; params->LIWORK = liwork; params->JOBZ = JOBZ; params->UPLO = UPLO; return 1; /* something failed */ error: memset(params, 0, sizeof(*params)); free(mem_buff2); free(mem_buff); return 0; } static inline fortran_int call_cheevd(EIGH_PARAMS_t *params) { fortran_int rv; LAPACK(cheevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, params->A, ¶ms->N, params->W, params->WORK, ¶ms->LWORK, params->RWORK, ¶ms->LRWORK, params->IWORK, ¶ms->LIWORK, &rv); return rv; } #line 1327 /* * Initialize the parameters to use in for the lapack function _heev * Handles buffer allocation */ static inline int init_zheevd(EIGH_PARAMS_t *params, char JOBZ, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; fortran_doublecomplex query_work_size; fortran_doublereal query_rwork_size; fortran_int query_iwork_size; fortran_int lwork = -1; fortran_int lrwork = -1; fortran_int liwork = -1; npy_uint8 *a, *w, *work, *rwork, *iwork; fortran_int info; mem_buff = malloc(N*N*sizeof(npy_cdouble)+N*sizeof(npy_double)); if (!mem_buff) goto error; a = mem_buff; w = mem_buff+N*N*sizeof(npy_cdouble); LAPACK(zheevd)(&JOBZ, &UPLO, &N, (fortran_doublecomplex*)a, &N, (fortran_doublereal*)w, &query_work_size, &lwork, &query_rwork_size, &lrwork, &query_iwork_size, &liwork, &info); if (info != 0) goto error; lwork = (fortran_int)*(fortran_doublereal*)&query_work_size; lrwork = (fortran_int)query_rwork_size; liwork = query_iwork_size; mem_buff2 = malloc(lwork*sizeof(npy_cdouble) + lrwork*sizeof(npy_double) + liwork*sizeof(fortran_int)); if (!mem_buff2) goto error; work = mem_buff2; rwork = work + lwork*sizeof(npy_cdouble); iwork = rwork + lrwork*sizeof(npy_double); params->A = a; params->W = w; params->WORK = work; params->RWORK = rwork; params->IWORK = iwork; params->N = N; params->LWORK = lwork; params->LRWORK = lrwork; params->LIWORK = liwork; params->JOBZ = JOBZ; params->UPLO = UPLO; return 1; /* something failed */ error: memset(params, 0, sizeof(*params)); free(mem_buff2); free(mem_buff); return 0; } static inline fortran_int call_zheevd(EIGH_PARAMS_t *params) { fortran_int rv; LAPACK(zheevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, params->A, ¶ms->N, params->W, params->WORK, ¶ms->LWORK, params->RWORK, ¶ms->LRWORK, params->IWORK, ¶ms->LIWORK, &rv); return rv; } #line 1421 /* * (M,M)->(M,)(M,M) * dimensions[1] -> M * args[0] -> A[in] * args[1] -> W * args[2] -> A[out] */ static inline void release_ssyevd(EIGH_PARAMS_t *params) { /* allocated memory in A and WORK */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void FLOAT_eigh_wrapper(char JOBZ, char UPLO, char**args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[3]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:3; EIGH_PARAMS_t eigh_params; int error_occurred = get_fp_invalid_and_clear(); for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_ssyevd(&eigh_params, JOBZ, UPLO, (fortran_int)dimensions[0])) { LINEARIZE_DATA_t matrix_in_ld; LINEARIZE_DATA_t eigenvectors_out_ld; LINEARIZE_DATA_t eigenvalues_out_ld; init_linearize_data(&matrix_in_ld, eigh_params.N, eigh_params.N, steps[1], steps[0]); init_linearize_data(&eigenvalues_out_ld, 1, eigh_params.N, 0, steps[2]); if ('V' == eigh_params.JOBZ) { init_linearize_data(&eigenvectors_out_ld, eigh_params.N, eigh_params.N, steps[4], steps[3]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_FLOAT_matrix(eigh_params.A, args[0], &matrix_in_ld); not_ok = call_ssyevd(&eigh_params); if (!not_ok) { /* lapack ok, copy result out */ delinearize_FLOAT_matrix(args[1], eigh_params.W, &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { delinearize_FLOAT_matrix(args[2], eigh_params.A, &eigenvectors_out_ld); } } else { /* lapack fail, set result to nan */ error_occurred = 1; nan_FLOAT_matrix(args[1], &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { nan_FLOAT_matrix(args[2], &eigenvectors_out_ld); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_ssyevd(&eigh_params); } set_fp_invalid_or_clear(error_occurred); } #line 1421 /* * (M,M)->(M,)(M,M) * dimensions[1] -> M * args[0] -> A[in] * args[1] -> W * args[2] -> A[out] */ static inline void release_dsyevd(EIGH_PARAMS_t *params) { /* allocated memory in A and WORK */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void DOUBLE_eigh_wrapper(char JOBZ, char UPLO, char**args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[3]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:3; EIGH_PARAMS_t eigh_params; int error_occurred = get_fp_invalid_and_clear(); for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_dsyevd(&eigh_params, JOBZ, UPLO, (fortran_int)dimensions[0])) { LINEARIZE_DATA_t matrix_in_ld; LINEARIZE_DATA_t eigenvectors_out_ld; LINEARIZE_DATA_t eigenvalues_out_ld; init_linearize_data(&matrix_in_ld, eigh_params.N, eigh_params.N, steps[1], steps[0]); init_linearize_data(&eigenvalues_out_ld, 1, eigh_params.N, 0, steps[2]); if ('V' == eigh_params.JOBZ) { init_linearize_data(&eigenvectors_out_ld, eigh_params.N, eigh_params.N, steps[4], steps[3]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_DOUBLE_matrix(eigh_params.A, args[0], &matrix_in_ld); not_ok = call_dsyevd(&eigh_params); if (!not_ok) { /* lapack ok, copy result out */ delinearize_DOUBLE_matrix(args[1], eigh_params.W, &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { delinearize_DOUBLE_matrix(args[2], eigh_params.A, &eigenvectors_out_ld); } } else { /* lapack fail, set result to nan */ error_occurred = 1; nan_DOUBLE_matrix(args[1], &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { nan_DOUBLE_matrix(args[2], &eigenvectors_out_ld); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_dsyevd(&eigh_params); } set_fp_invalid_or_clear(error_occurred); } #line 1421 /* * (M,M)->(M,)(M,M) * dimensions[1] -> M * args[0] -> A[in] * args[1] -> W * args[2] -> A[out] */ static inline void release_cheevd(EIGH_PARAMS_t *params) { /* allocated memory in A and WORK */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void CFLOAT_eigh_wrapper(char JOBZ, char UPLO, char**args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[3]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:3; EIGH_PARAMS_t eigh_params; int error_occurred = get_fp_invalid_and_clear(); for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_cheevd(&eigh_params, JOBZ, UPLO, (fortran_int)dimensions[0])) { LINEARIZE_DATA_t matrix_in_ld; LINEARIZE_DATA_t eigenvectors_out_ld; LINEARIZE_DATA_t eigenvalues_out_ld; init_linearize_data(&matrix_in_ld, eigh_params.N, eigh_params.N, steps[1], steps[0]); init_linearize_data(&eigenvalues_out_ld, 1, eigh_params.N, 0, steps[2]); if ('V' == eigh_params.JOBZ) { init_linearize_data(&eigenvectors_out_ld, eigh_params.N, eigh_params.N, steps[4], steps[3]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_CFLOAT_matrix(eigh_params.A, args[0], &matrix_in_ld); not_ok = call_cheevd(&eigh_params); if (!not_ok) { /* lapack ok, copy result out */ delinearize_FLOAT_matrix(args[1], eigh_params.W, &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { delinearize_CFLOAT_matrix(args[2], eigh_params.A, &eigenvectors_out_ld); } } else { /* lapack fail, set result to nan */ error_occurred = 1; nan_FLOAT_matrix(args[1], &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { nan_CFLOAT_matrix(args[2], &eigenvectors_out_ld); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_cheevd(&eigh_params); } set_fp_invalid_or_clear(error_occurred); } #line 1421 /* * (M,M)->(M,)(M,M) * dimensions[1] -> M * args[0] -> A[in] * args[1] -> W * args[2] -> A[out] */ static inline void release_zheevd(EIGH_PARAMS_t *params) { /* allocated memory in A and WORK */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void CDOUBLE_eigh_wrapper(char JOBZ, char UPLO, char**args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[3]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:3; EIGH_PARAMS_t eigh_params; int error_occurred = get_fp_invalid_and_clear(); for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_zheevd(&eigh_params, JOBZ, UPLO, (fortran_int)dimensions[0])) { LINEARIZE_DATA_t matrix_in_ld; LINEARIZE_DATA_t eigenvectors_out_ld; LINEARIZE_DATA_t eigenvalues_out_ld; init_linearize_data(&matrix_in_ld, eigh_params.N, eigh_params.N, steps[1], steps[0]); init_linearize_data(&eigenvalues_out_ld, 1, eigh_params.N, 0, steps[2]); if ('V' == eigh_params.JOBZ) { init_linearize_data(&eigenvectors_out_ld, eigh_params.N, eigh_params.N, steps[4], steps[3]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_CDOUBLE_matrix(eigh_params.A, args[0], &matrix_in_ld); not_ok = call_zheevd(&eigh_params); if (!not_ok) { /* lapack ok, copy result out */ delinearize_DOUBLE_matrix(args[1], eigh_params.W, &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { delinearize_CDOUBLE_matrix(args[2], eigh_params.A, &eigenvectors_out_ld); } } else { /* lapack fail, set result to nan */ error_occurred = 1; nan_DOUBLE_matrix(args[1], &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { nan_CDOUBLE_matrix(args[2], &eigenvectors_out_ld); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_zheevd(&eigh_params); } set_fp_invalid_or_clear(error_occurred); } #line 1516 static void FLOAT_eighlo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { FLOAT_eigh_wrapper('V', 'L', args, dimensions, steps); } static void FLOAT_eighup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { FLOAT_eigh_wrapper('V', 'U', args, dimensions, steps); } static void FLOAT_eigvalshlo(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { FLOAT_eigh_wrapper('N', 'L', args, dimensions, steps); } static void FLOAT_eigvalshup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { FLOAT_eigh_wrapper('N', 'U', args, dimensions, steps); } #line 1516 static void DOUBLE_eighlo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { DOUBLE_eigh_wrapper('V', 'L', args, dimensions, steps); } static void DOUBLE_eighup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { DOUBLE_eigh_wrapper('V', 'U', args, dimensions, steps); } static void DOUBLE_eigvalshlo(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { DOUBLE_eigh_wrapper('N', 'L', args, dimensions, steps); } static void DOUBLE_eigvalshup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { DOUBLE_eigh_wrapper('N', 'U', args, dimensions, steps); } #line 1516 static void CFLOAT_eighlo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CFLOAT_eigh_wrapper('V', 'L', args, dimensions, steps); } static void CFLOAT_eighup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { CFLOAT_eigh_wrapper('V', 'U', args, dimensions, steps); } static void CFLOAT_eigvalshlo(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { CFLOAT_eigh_wrapper('N', 'L', args, dimensions, steps); } static void CFLOAT_eigvalshup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { CFLOAT_eigh_wrapper('N', 'U', args, dimensions, steps); } #line 1516 static void CDOUBLE_eighlo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CDOUBLE_eigh_wrapper('V', 'L', args, dimensions, steps); } static void CDOUBLE_eighup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { CDOUBLE_eigh_wrapper('V', 'U', args, dimensions, steps); } static void CDOUBLE_eigvalshlo(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { CDOUBLE_eigh_wrapper('N', 'L', args, dimensions, steps); } static void CDOUBLE_eigvalshup(char **args, npy_intp *dimensions, npy_intp *steps, void* NPY_UNUSED(func)) { CDOUBLE_eigh_wrapper('N', 'U', args, dimensions, steps); } /* -------------------------------------------------------------------------- */ /* Solve family (includes inv) */ typedef struct gesv_params_struct { void *A; /* A is (N,N) of base type */ void *B; /* B is (N,NRHS) of base type */ fortran_int * IPIV; /* IPIV is (N) */ fortran_int N; fortran_int NRHS; fortran_int LDA; fortran_int LDB; } GESV_PARAMS_t; #line 1574 /* * Initialize the parameters to use in for the lapack function _heev * Handles buffer allocation */ static inline int init_sgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) { npy_uint8 *mem_buff = NULL; npy_uint8 *a, *b, *ipiv; mem_buff = malloc(N*N*sizeof(fortran_real) + N*NRHS*sizeof(fortran_real) + N*sizeof(fortran_int)); if (!mem_buff) goto error; a = mem_buff; b = a + N*N*sizeof(fortran_real); ipiv = b + N*NRHS*sizeof(fortran_real); params->A = a; params->B = b; params->IPIV = (fortran_int*)ipiv; params->N = N; params->NRHS = NRHS; params->LDA = N; params->LDB = N; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_sgesv(GESV_PARAMS_t *params) { /* memory block base is in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_sgesv(GESV_PARAMS_t *params) { fortran_int rv; LAPACK(sgesv)(¶ms->N, ¶ms->NRHS, params->A, ¶ms->LDA, params->IPIV, params->B, ¶ms->LDB, &rv); return rv; } static void FLOAT_solve(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n, nrhs; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; nrhs = (fortran_int)dimensions[1]; if (init_sgesv(¶ms, n, nrhs)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_FLOAT_matrix(params.A, args[0], &a_in); linearize_FLOAT_matrix(params.B, args[1], &b_in); not_ok =call_sgesv(¶ms); if (!not_ok) { delinearize_FLOAT_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_FLOAT_matrix(args[2], &r_out); } END_OUTER_LOOP release_sgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void FLOAT_solve1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; if (init_sgesv(¶ms, n, 1)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, 1, n, 1, steps[2]); init_linearize_data(&r_out, 1, n, 1, steps[3]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_FLOAT_matrix(params.A, args[0], &a_in); linearize_FLOAT_matrix(params.B, args[1], &b_in); not_ok = call_sgesv(¶ms); if (!not_ok) { delinearize_FLOAT_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_FLOAT_matrix(args[2], &r_out); } END_OUTER_LOOP release_sgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void FLOAT_inv(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_2 n = (fortran_int)dimensions[0]; if (init_sgesv(¶ms, n, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_FLOAT_matrix(params.A, args[0], &a_in); identity_FLOAT_matrix(params.B, n); not_ok = call_sgesv(¶ms); if (!not_ok) { delinearize_FLOAT_matrix(args[1], params.B, &r_out); } else { error_occurred = 1; nan_FLOAT_matrix(args[1], &r_out); } END_OUTER_LOOP release_sgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } #line 1574 /* * Initialize the parameters to use in for the lapack function _heev * Handles buffer allocation */ static inline int init_dgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) { npy_uint8 *mem_buff = NULL; npy_uint8 *a, *b, *ipiv; mem_buff = malloc(N*N*sizeof(fortran_doublereal) + N*NRHS*sizeof(fortran_doublereal) + N*sizeof(fortran_int)); if (!mem_buff) goto error; a = mem_buff; b = a + N*N*sizeof(fortran_doublereal); ipiv = b + N*NRHS*sizeof(fortran_doublereal); params->A = a; params->B = b; params->IPIV = (fortran_int*)ipiv; params->N = N; params->NRHS = NRHS; params->LDA = N; params->LDB = N; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_dgesv(GESV_PARAMS_t *params) { /* memory block base is in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_dgesv(GESV_PARAMS_t *params) { fortran_int rv; LAPACK(dgesv)(¶ms->N, ¶ms->NRHS, params->A, ¶ms->LDA, params->IPIV, params->B, ¶ms->LDB, &rv); return rv; } static void DOUBLE_solve(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n, nrhs; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; nrhs = (fortran_int)dimensions[1]; if (init_dgesv(¶ms, n, nrhs)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_DOUBLE_matrix(params.A, args[0], &a_in); linearize_DOUBLE_matrix(params.B, args[1], &b_in); not_ok =call_dgesv(¶ms); if (!not_ok) { delinearize_DOUBLE_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_DOUBLE_matrix(args[2], &r_out); } END_OUTER_LOOP release_dgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void DOUBLE_solve1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; if (init_dgesv(¶ms, n, 1)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, 1, n, 1, steps[2]); init_linearize_data(&r_out, 1, n, 1, steps[3]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_DOUBLE_matrix(params.A, args[0], &a_in); linearize_DOUBLE_matrix(params.B, args[1], &b_in); not_ok = call_dgesv(¶ms); if (!not_ok) { delinearize_DOUBLE_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_DOUBLE_matrix(args[2], &r_out); } END_OUTER_LOOP release_dgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void DOUBLE_inv(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_2 n = (fortran_int)dimensions[0]; if (init_dgesv(¶ms, n, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_DOUBLE_matrix(params.A, args[0], &a_in); identity_DOUBLE_matrix(params.B, n); not_ok = call_dgesv(¶ms); if (!not_ok) { delinearize_DOUBLE_matrix(args[1], params.B, &r_out); } else { error_occurred = 1; nan_DOUBLE_matrix(args[1], &r_out); } END_OUTER_LOOP release_dgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } #line 1574 /* * Initialize the parameters to use in for the lapack function _heev * Handles buffer allocation */ static inline int init_cgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) { npy_uint8 *mem_buff = NULL; npy_uint8 *a, *b, *ipiv; mem_buff = malloc(N*N*sizeof(fortran_complex) + N*NRHS*sizeof(fortran_complex) + N*sizeof(fortran_int)); if (!mem_buff) goto error; a = mem_buff; b = a + N*N*sizeof(fortran_complex); ipiv = b + N*NRHS*sizeof(fortran_complex); params->A = a; params->B = b; params->IPIV = (fortran_int*)ipiv; params->N = N; params->NRHS = NRHS; params->LDA = N; params->LDB = N; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_cgesv(GESV_PARAMS_t *params) { /* memory block base is in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_cgesv(GESV_PARAMS_t *params) { fortran_int rv; LAPACK(cgesv)(¶ms->N, ¶ms->NRHS, params->A, ¶ms->LDA, params->IPIV, params->B, ¶ms->LDB, &rv); return rv; } static void CFLOAT_solve(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n, nrhs; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; nrhs = (fortran_int)dimensions[1]; if (init_cgesv(¶ms, n, nrhs)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_CFLOAT_matrix(params.A, args[0], &a_in); linearize_CFLOAT_matrix(params.B, args[1], &b_in); not_ok =call_cgesv(¶ms); if (!not_ok) { delinearize_CFLOAT_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_CFLOAT_matrix(args[2], &r_out); } END_OUTER_LOOP release_cgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void CFLOAT_solve1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; if (init_cgesv(¶ms, n, 1)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, 1, n, 1, steps[2]); init_linearize_data(&r_out, 1, n, 1, steps[3]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_CFLOAT_matrix(params.A, args[0], &a_in); linearize_CFLOAT_matrix(params.B, args[1], &b_in); not_ok = call_cgesv(¶ms); if (!not_ok) { delinearize_CFLOAT_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_CFLOAT_matrix(args[2], &r_out); } END_OUTER_LOOP release_cgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void CFLOAT_inv(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_2 n = (fortran_int)dimensions[0]; if (init_cgesv(¶ms, n, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_CFLOAT_matrix(params.A, args[0], &a_in); identity_CFLOAT_matrix(params.B, n); not_ok = call_cgesv(¶ms); if (!not_ok) { delinearize_CFLOAT_matrix(args[1], params.B, &r_out); } else { error_occurred = 1; nan_CFLOAT_matrix(args[1], &r_out); } END_OUTER_LOOP release_cgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } #line 1574 /* * Initialize the parameters to use in for the lapack function _heev * Handles buffer allocation */ static inline int init_zgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) { npy_uint8 *mem_buff = NULL; npy_uint8 *a, *b, *ipiv; mem_buff = malloc(N*N*sizeof(fortran_doublecomplex) + N*NRHS*sizeof(fortran_doublecomplex) + N*sizeof(fortran_int)); if (!mem_buff) goto error; a = mem_buff; b = a + N*N*sizeof(fortran_doublecomplex); ipiv = b + N*NRHS*sizeof(fortran_doublecomplex); params->A = a; params->B = b; params->IPIV = (fortran_int*)ipiv; params->N = N; params->NRHS = NRHS; params->LDA = N; params->LDB = N; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_zgesv(GESV_PARAMS_t *params) { /* memory block base is in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_zgesv(GESV_PARAMS_t *params) { fortran_int rv; LAPACK(zgesv)(¶ms->N, ¶ms->NRHS, params->A, ¶ms->LDA, params->IPIV, params->B, ¶ms->LDB, &rv); return rv; } static void CDOUBLE_solve(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n, nrhs; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; nrhs = (fortran_int)dimensions[1]; if (init_zgesv(¶ms, n, nrhs)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_CDOUBLE_matrix(params.A, args[0], &a_in); linearize_CDOUBLE_matrix(params.B, args[1], &b_in); not_ok =call_zgesv(¶ms); if (!not_ok) { delinearize_CDOUBLE_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_CDOUBLE_matrix(args[2], &r_out); } END_OUTER_LOOP release_zgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void CDOUBLE_solve1(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; if (init_zgesv(¶ms, n, 1)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, 1, n, 1, steps[2]); init_linearize_data(&r_out, 1, n, 1, steps[3]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_CDOUBLE_matrix(params.A, args[0], &a_in); linearize_CDOUBLE_matrix(params.B, args[1], &b_in); not_ok = call_zgesv(¶ms); if (!not_ok) { delinearize_CDOUBLE_matrix(args[2], params.B, &r_out); } else { error_occurred = 1; nan_CDOUBLE_matrix(args[2], &r_out); } END_OUTER_LOOP release_zgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void CDOUBLE_inv(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { GESV_PARAMS_t params; fortran_int n; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_2 n = (fortran_int)dimensions[0]; if (init_zgesv(¶ms, n, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_CDOUBLE_matrix(params.A, args[0], &a_in); identity_CDOUBLE_matrix(params.B, n); not_ok = call_zgesv(¶ms); if (!not_ok) { delinearize_CDOUBLE_matrix(args[1], params.B, &r_out); } else { error_occurred = 1; nan_CDOUBLE_matrix(args[1], &r_out); } END_OUTER_LOOP release_zgesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } /* -------------------------------------------------------------------------- */ /* Cholesky decomposition */ typedef struct potr_params_struct { void *A; fortran_int N; fortran_int LDA; char UPLO; } POTR_PARAMS_t; #line 1756 static inline int init_spotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *a; mem_buff = malloc(N*N*sizeof(fortran_real)); if (!mem_buff) goto error; a = mem_buff; params->A = a; params->N = N; params->LDA = N; params->UPLO = UPLO; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_spotrf(POTR_PARAMS_t *params) { /* memory block base in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_spotrf(POTR_PARAMS_t *params) { fortran_int rv; LAPACK(spotrf)(¶ms->UPLO, ¶ms->N, params->A, ¶ms->LDA, &rv); return rv; } static void FLOAT_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) { POTR_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_2 assert(uplo == 'L'); n = (fortran_int)dimensions[0]; if (init_spotrf(¶ms, uplo, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_FLOAT_matrix(params.A, args[0], &a_in); not_ok = call_spotrf(¶ms); if (!not_ok) { triu_FLOAT_matrix(params.A, params.N); delinearize_FLOAT_matrix(args[1], params.A, &r_out); } else { error_occurred = 1; nan_FLOAT_matrix(args[1], &r_out); } END_OUTER_LOOP release_spotrf(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void FLOAT_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { FLOAT_cholesky('L', args, dimensions, steps); } #line 1756 static inline int init_dpotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *a; mem_buff = malloc(N*N*sizeof(fortran_doublereal)); if (!mem_buff) goto error; a = mem_buff; params->A = a; params->N = N; params->LDA = N; params->UPLO = UPLO; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_dpotrf(POTR_PARAMS_t *params) { /* memory block base in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_dpotrf(POTR_PARAMS_t *params) { fortran_int rv; LAPACK(dpotrf)(¶ms->UPLO, ¶ms->N, params->A, ¶ms->LDA, &rv); return rv; } static void DOUBLE_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) { POTR_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_2 assert(uplo == 'L'); n = (fortran_int)dimensions[0]; if (init_dpotrf(¶ms, uplo, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_DOUBLE_matrix(params.A, args[0], &a_in); not_ok = call_dpotrf(¶ms); if (!not_ok) { triu_DOUBLE_matrix(params.A, params.N); delinearize_DOUBLE_matrix(args[1], params.A, &r_out); } else { error_occurred = 1; nan_DOUBLE_matrix(args[1], &r_out); } END_OUTER_LOOP release_dpotrf(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void DOUBLE_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { DOUBLE_cholesky('L', args, dimensions, steps); } #line 1756 static inline int init_cpotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *a; mem_buff = malloc(N*N*sizeof(fortran_complex)); if (!mem_buff) goto error; a = mem_buff; params->A = a; params->N = N; params->LDA = N; params->UPLO = UPLO; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_cpotrf(POTR_PARAMS_t *params) { /* memory block base in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_cpotrf(POTR_PARAMS_t *params) { fortran_int rv; LAPACK(cpotrf)(¶ms->UPLO, ¶ms->N, params->A, ¶ms->LDA, &rv); return rv; } static void CFLOAT_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) { POTR_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_2 assert(uplo == 'L'); n = (fortran_int)dimensions[0]; if (init_cpotrf(¶ms, uplo, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_CFLOAT_matrix(params.A, args[0], &a_in); not_ok = call_cpotrf(¶ms); if (!not_ok) { triu_CFLOAT_matrix(params.A, params.N); delinearize_CFLOAT_matrix(args[1], params.A, &r_out); } else { error_occurred = 1; nan_CFLOAT_matrix(args[1], &r_out); } END_OUTER_LOOP release_cpotrf(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void CFLOAT_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CFLOAT_cholesky('L', args, dimensions, steps); } #line 1756 static inline int init_zpotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *a; mem_buff = malloc(N*N*sizeof(fortran_doublecomplex)); if (!mem_buff) goto error; a = mem_buff; params->A = a; params->N = N; params->LDA = N; params->UPLO = UPLO; return 1; error: free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline void release_zpotrf(POTR_PARAMS_t *params) { /* memory block base in A */ free(params->A); memset(params, 0, sizeof(*params)); } static inline fortran_int call_zpotrf(POTR_PARAMS_t *params) { fortran_int rv; LAPACK(zpotrf)(¶ms->UPLO, ¶ms->N, params->A, ¶ms->LDA, &rv); return rv; } static void CDOUBLE_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) { POTR_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_2 assert(uplo == 'L'); n = (fortran_int)dimensions[0]; if (init_zpotrf(¶ms, uplo, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; linearize_CDOUBLE_matrix(params.A, args[0], &a_in); not_ok = call_zpotrf(¶ms); if (!not_ok) { triu_CDOUBLE_matrix(params.A, params.N); delinearize_CDOUBLE_matrix(args[1], params.A, &r_out); } else { error_occurred = 1; nan_CDOUBLE_matrix(args[1], &r_out); } END_OUTER_LOOP release_zpotrf(¶ms); } set_fp_invalid_or_clear(error_occurred); } static void CDOUBLE_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CDOUBLE_cholesky('L', args, dimensions, steps); } /* -------------------------------------------------------------------------- */ /* eig family */ typedef struct geev_params_struct { void *A; void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/ void *WI; void *VLR; /* REAL VL buffers for _geev where _ is s, d */ void *VRR; /* REAL VR buffers for _geev hwere _ is s, d */ void *WORK; void *W; /* final w */ void *VL; /* final vl */ void *VR; /* final vr */ fortran_int N; fortran_int LDA; fortran_int LDVL; fortran_int LDVR; fortran_int LWORK; char JOBVL; char JOBVR; } GEEV_PARAMS_t; static inline void dump_geev_params(const char *name, GEEV_PARAMS_t* params) { TRACE_TXT("\n%s\n" "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %p\n"\ "\t%10s: %d\n"\ "\t%10s: %d\n"\ "\t%10s: %d\n"\ "\t%10s: %d\n"\ "\t%10s: %d\n"\ "\t%10s: %c\n"\ "\t%10s: %c\n", name, "A", params->A, "WR", params->WR, "WI", params->WI, "VLR", params->VLR, "VRR", params->VRR, "WORK", params->WORK, "W", params->W, "VL", params->VL, "VR", params->VR, "N", (int)params->N, "LDA", (int)params->LDA, "LDVL", (int)params->LDVL, "LDVR", (int)params->LDVR, "LWORK", (int)params->LWORK, "JOBVL", params->JOBVL, "JOBVR", params->JOBVR); } #line 1921 static inline int init_sgeev(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) { npy_uint8 *mem_buff=NULL; npy_uint8 *mem_buff2=NULL; npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; size_t a_size = n*n*sizeof(float); size_t wr_size = n*sizeof(float); size_t wi_size = n*sizeof(float); size_t vlr_size = jobvl=='V' ? n*n*sizeof(float) : 0; size_t vrr_size = jobvr=='V' ? n*n*sizeof(float) : 0; size_t w_size = wr_size*2; size_t vl_size = vlr_size*2; size_t vr_size = vrr_size*2; size_t work_count = 0; float work_size_query; fortran_int do_size_query = -1; fortran_int rv; /* allocate data for known sizes (all but work) */ mem_buff = malloc(a_size + wr_size + wi_size + vlr_size + vrr_size + w_size + vl_size + vr_size); if (!mem_buff) goto error; a = mem_buff; wr = a + a_size; wi = wr + wr_size; vlr = wi + wi_size; vrr = vlr + vlr_size; w = vrr + vrr_size; vl = w + w_size; vr = vl + vl_size; LAPACK(sgeev)(&jobvl, &jobvr, &n, (void *)a, &n, (void *)wr, (void *)wi, (void *)vl, &n, (void *)vr, &n, &work_size_query, &do_size_query, &rv); if (0 != rv) goto error; work_count = (size_t)work_size_query; mem_buff2 = malloc(work_count*sizeof(float)); if (!mem_buff2) goto error; work = mem_buff2; params->A = a; params->WR = wr; params->WI = wi; params->VLR = vlr; params->VRR = vrr; params->WORK = work; params->W = w; params->VL = vl; params->VR = vr; params->N = n; params->LDA = n; params->LDVL = n; params->LDVR = n; params->LWORK = (fortran_int)work_count; params->JOBVL = jobvl; params->JOBVR = jobvr; return 1; error: free(mem_buff2); free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_sgeev(GEEV_PARAMS_t* params) { fortran_int rv; LAPACK(sgeev)(¶ms->JOBVL, ¶ms->JOBVR, ¶ms->N, params->A, ¶ms->LDA, params->WR, params->WI, params->VLR, ¶ms->LDVL, params->VRR, ¶ms->LDVR, params->WORK, ¶ms->LWORK, &rv); return rv; } static inline void mk_FLOAT_complex_array_from_real(COMPLEX_t *c, const float *re, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { c[iter].array[0] = re[iter]; c[iter].array[1] = 0.0f; } } static inline void mk_FLOAT_complex_array(COMPLEX_t *c, const float *re, const float *im, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { c[iter].array[0] = re[iter]; c[iter].array[1] = im[iter]; } } static inline void mk_FLOAT_complex_array_conjugate_pair(COMPLEX_t *c, const float *r, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { float re = r[iter]; float im = r[iter+n]; c[iter].array[0] = re; c[iter].array[1] = im; c[iter+n].array[0] = re; c[iter+n].array[1] = -im; } } /* * make the complex eigenvectors from the real array produced by sgeev/zgeev. * c is the array where the results will be left. * r is the source array of reals produced by sgeev/zgeev * i is the eigenvalue imaginary part produced by sgeev/zgeev * n is so that the order of the matrix is n by n */ static inline void mk_sgeev_complex_eigenvectors(COMPLEX_t *c, const float *r, const float *i, size_t n) { size_t iter = 0; while (iter < n) { if (i[iter] == 0.0f) { /* eigenvalue was real, eigenvectors as well... */ mk_FLOAT_complex_array_from_real(c, r, n); c += n; r += n; iter ++; } else { /* eigenvalue was complex, generate a pair of eigenvectors */ mk_FLOAT_complex_array_conjugate_pair(c, r, n); c += 2*n; r += 2*n; iter += 2; } } } static inline void process_sgeev_results(GEEV_PARAMS_t *params) { /* REAL versions of geev need the results to be translated * into complex versions. This is the way to deal with imaginary * results. In our gufuncs we will always return complex arrays! */ mk_FLOAT_complex_array(params->W, params->WR, params->WI, params->N); /* handle the eigenvectors */ if ('V' == params->JOBVL) { mk_sgeev_complex_eigenvectors(params->VL, params->VLR, params->WI, params->N); } if ('V' == params->JOBVR) { mk_sgeev_complex_eigenvectors(params->VR, params->VRR, params->WI, params->N); } } #line 1921 static inline int init_dgeev(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) { npy_uint8 *mem_buff=NULL; npy_uint8 *mem_buff2=NULL; npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; size_t a_size = n*n*sizeof(double); size_t wr_size = n*sizeof(double); size_t wi_size = n*sizeof(double); size_t vlr_size = jobvl=='V' ? n*n*sizeof(double) : 0; size_t vrr_size = jobvr=='V' ? n*n*sizeof(double) : 0; size_t w_size = wr_size*2; size_t vl_size = vlr_size*2; size_t vr_size = vrr_size*2; size_t work_count = 0; double work_size_query; fortran_int do_size_query = -1; fortran_int rv; /* allocate data for known sizes (all but work) */ mem_buff = malloc(a_size + wr_size + wi_size + vlr_size + vrr_size + w_size + vl_size + vr_size); if (!mem_buff) goto error; a = mem_buff; wr = a + a_size; wi = wr + wr_size; vlr = wi + wi_size; vrr = vlr + vlr_size; w = vrr + vrr_size; vl = w + w_size; vr = vl + vl_size; LAPACK(dgeev)(&jobvl, &jobvr, &n, (void *)a, &n, (void *)wr, (void *)wi, (void *)vl, &n, (void *)vr, &n, &work_size_query, &do_size_query, &rv); if (0 != rv) goto error; work_count = (size_t)work_size_query; mem_buff2 = malloc(work_count*sizeof(double)); if (!mem_buff2) goto error; work = mem_buff2; params->A = a; params->WR = wr; params->WI = wi; params->VLR = vlr; params->VRR = vrr; params->WORK = work; params->W = w; params->VL = vl; params->VR = vr; params->N = n; params->LDA = n; params->LDVL = n; params->LDVR = n; params->LWORK = (fortran_int)work_count; params->JOBVL = jobvl; params->JOBVR = jobvr; return 1; error: free(mem_buff2); free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_dgeev(GEEV_PARAMS_t* params) { fortran_int rv; LAPACK(dgeev)(¶ms->JOBVL, ¶ms->JOBVR, ¶ms->N, params->A, ¶ms->LDA, params->WR, params->WI, params->VLR, ¶ms->LDVL, params->VRR, ¶ms->LDVR, params->WORK, ¶ms->LWORK, &rv); return rv; } static inline void mk_DOUBLE_complex_array_from_real(DOUBLECOMPLEX_t *c, const double *re, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { c[iter].array[0] = re[iter]; c[iter].array[1] = 0.0; } } static inline void mk_DOUBLE_complex_array(DOUBLECOMPLEX_t *c, const double *re, const double *im, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { c[iter].array[0] = re[iter]; c[iter].array[1] = im[iter]; } } static inline void mk_DOUBLE_complex_array_conjugate_pair(DOUBLECOMPLEX_t *c, const double *r, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { double re = r[iter]; double im = r[iter+n]; c[iter].array[0] = re; c[iter].array[1] = im; c[iter+n].array[0] = re; c[iter+n].array[1] = -im; } } /* * make the complex eigenvectors from the real array produced by sgeev/zgeev. * c is the array where the results will be left. * r is the source array of reals produced by sgeev/zgeev * i is the eigenvalue imaginary part produced by sgeev/zgeev * n is so that the order of the matrix is n by n */ static inline void mk_dgeev_complex_eigenvectors(DOUBLECOMPLEX_t *c, const double *r, const double *i, size_t n) { size_t iter = 0; while (iter < n) { if (i[iter] == 0.0) { /* eigenvalue was real, eigenvectors as well... */ mk_DOUBLE_complex_array_from_real(c, r, n); c += n; r += n; iter ++; } else { /* eigenvalue was complex, generate a pair of eigenvectors */ mk_DOUBLE_complex_array_conjugate_pair(c, r, n); c += 2*n; r += 2*n; iter += 2; } } } static inline void process_dgeev_results(GEEV_PARAMS_t *params) { /* REAL versions of geev need the results to be translated * into complex versions. This is the way to deal with imaginary * results. In our gufuncs we will always return complex arrays! */ mk_DOUBLE_complex_array(params->W, params->WR, params->WI, params->N); /* handle the eigenvectors */ if ('V' == params->JOBVL) { mk_dgeev_complex_eigenvectors(params->VL, params->VLR, params->WI, params->N); } if ('V' == params->JOBVR) { mk_dgeev_complex_eigenvectors(params->VR, params->VRR, params->WI, params->N); } } #line 2113 static inline int init_cgeev(GEEV_PARAMS_t* params, char jobvl, char jobvr, fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *w, *vl, *vr, *work, *rwork; size_t a_size = n*n*sizeof(fortran_complex); size_t w_size = n*sizeof(fortran_complex); size_t vl_size = jobvl=='V'? n*n*sizeof(fortran_complex) : 0; size_t vr_size = jobvr=='V'? n*n*sizeof(fortran_complex) : 0; size_t rwork_size = 2*n*sizeof(float); size_t work_count = 0; COMPLEX_t work_size_query; fortran_int do_size_query = -1; fortran_int rv; size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; mem_buff = malloc(total_size); if (!mem_buff) goto error; a = mem_buff; w = a + a_size; vl = w + w_size; vr = vl + vl_size; rwork = vr + vr_size; LAPACK(cgeev)(&jobvl, &jobvr, &n, (void *)a, &n, (void *)w, (void *)vl, &n, (void *)vr, &n, (void *)&work_size_query, &do_size_query, (void *)rwork, &rv); if (0 != rv) goto error; work_count = (size_t) work_size_query.array[0]; mem_buff2 = malloc(work_count*sizeof(fortran_complex)); if (!mem_buff2) goto error; work = mem_buff2; params->A = a; params->WR = rwork; params->WI = NULL; params->VLR = NULL; params->VRR = NULL; params->VL = vl; params->VR = vr; params->WORK = work; params->W = w; params->N = n; params->LDA = n; params->LDVL = n; params->LDVR = n; params->LWORK = (fortran_int)work_count; params->JOBVL = jobvl; params->JOBVR = jobvr; return 1; error: free(mem_buff2); free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_cgeev(GEEV_PARAMS_t* params) { fortran_int rv; LAPACK(cgeev)(¶ms->JOBVL, ¶ms->JOBVR, ¶ms->N, params->A, ¶ms->LDA, params->W, params->VL, ¶ms->LDVL, params->VR, ¶ms->LDVR, params->WORK, ¶ms->LWORK, params->WR, /* actually RWORK */ &rv); return rv; } static inline void process_cgeev_results(GEEV_PARAMS_t *NPY_UNUSED(params)) { /* nothing to do here, complex versions are ready to copy out */ } #line 2113 static inline int init_zgeev(GEEV_PARAMS_t* params, char jobvl, char jobvr, fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *w, *vl, *vr, *work, *rwork; size_t a_size = n*n*sizeof(fortran_doublecomplex); size_t w_size = n*sizeof(fortran_doublecomplex); size_t vl_size = jobvl=='V'? n*n*sizeof(fortran_doublecomplex) : 0; size_t vr_size = jobvr=='V'? n*n*sizeof(fortran_doublecomplex) : 0; size_t rwork_size = 2*n*sizeof(double); size_t work_count = 0; DOUBLECOMPLEX_t work_size_query; fortran_int do_size_query = -1; fortran_int rv; size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; mem_buff = malloc(total_size); if (!mem_buff) goto error; a = mem_buff; w = a + a_size; vl = w + w_size; vr = vl + vl_size; rwork = vr + vr_size; LAPACK(zgeev)(&jobvl, &jobvr, &n, (void *)a, &n, (void *)w, (void *)vl, &n, (void *)vr, &n, (void *)&work_size_query, &do_size_query, (void *)rwork, &rv); if (0 != rv) goto error; work_count = (size_t) work_size_query.array[0]; mem_buff2 = malloc(work_count*sizeof(fortran_doublecomplex)); if (!mem_buff2) goto error; work = mem_buff2; params->A = a; params->WR = rwork; params->WI = NULL; params->VLR = NULL; params->VRR = NULL; params->VL = vl; params->VR = vr; params->WORK = work; params->W = w; params->N = n; params->LDA = n; params->LDVL = n; params->LDVR = n; params->LWORK = (fortran_int)work_count; params->JOBVL = jobvl; params->JOBVR = jobvr; return 1; error: free(mem_buff2); free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_zgeev(GEEV_PARAMS_t* params) { fortran_int rv; LAPACK(zgeev)(¶ms->JOBVL, ¶ms->JOBVR, ¶ms->N, params->A, ¶ms->LDA, params->W, params->VL, ¶ms->LDVL, params->VR, ¶ms->LDVR, params->WORK, ¶ms->LWORK, params->WR, /* actually RWORK */ &rv); return rv; } static inline void process_zgeev_results(GEEV_PARAMS_t *NPY_UNUSED(params)) { /* nothing to do here, complex versions are ready to copy out */ } #line 2216 static inline void release_sgeev(GEEV_PARAMS_t *params) { free(params->WORK); free(params->A); memset(params, 0, sizeof(*params)); } static inline void FLOAT_eig_wrapper(char JOBVL, char JOBVR, char**args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[4]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = 2; int error_occurred = get_fp_invalid_and_clear(); GEEV_PARAMS_t geev_params; assert(JOBVL == 'N'); STACK_TRACE; op_count += 'V'==JOBVL?1:0; op_count += 'V'==JOBVR?1:0; for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_sgeev(&geev_params, JOBVL, JOBVR, (fortran_int)dimensions[0])) { LINEARIZE_DATA_t a_in; LINEARIZE_DATA_t w_out; LINEARIZE_DATA_t vl_out; LINEARIZE_DATA_t vr_out; init_linearize_data(&a_in, geev_params.N, geev_params.N, steps[1], steps[0]); steps += 2; init_linearize_data(&w_out, 1, geev_params.N, 0, steps[0]); steps += 1; if ('V' == geev_params.JOBVL) { init_linearize_data(&vl_out, geev_params.N, geev_params.N, steps[1], steps[0]); steps += 2; } if ('V' == geev_params.JOBVR) { init_linearize_data(&vr_out, geev_params.N, geev_params.N, steps[1], steps[0]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; char **arg_iter = args; /* copy the matrix in */ linearize_FLOAT_matrix(geev_params.A, *arg_iter++, &a_in); not_ok = call_sgeev(&geev_params); if (!not_ok) { process_sgeev_results(&geev_params); delinearize_CFLOAT_matrix(*arg_iter++, geev_params.W, &w_out); if ('V' == geev_params.JOBVL) delinearize_CFLOAT_matrix(*arg_iter++, geev_params.VL, &vl_out); if ('V' == geev_params.JOBVR) delinearize_CFLOAT_matrix(*arg_iter++, geev_params.VR, &vr_out); } else { /* geev failed */ error_occurred = 1; nan_CFLOAT_matrix(*arg_iter++, &w_out); if ('V' == geev_params.JOBVL) nan_CFLOAT_matrix(*arg_iter++, &vl_out); if ('V' == geev_params.JOBVR) nan_CFLOAT_matrix(*arg_iter++, &vr_out); } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_sgeev(&geev_params); } set_fp_invalid_or_clear(error_occurred); } static void FLOAT_eig(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { FLOAT_eig_wrapper('N', 'V', args, dimensions, steps); } static void FLOAT_eigvals(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { FLOAT_eig_wrapper('N', 'N', args, dimensions, steps); } #line 2216 static inline void release_dgeev(GEEV_PARAMS_t *params) { free(params->WORK); free(params->A); memset(params, 0, sizeof(*params)); } static inline void DOUBLE_eig_wrapper(char JOBVL, char JOBVR, char**args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[4]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = 2; int error_occurred = get_fp_invalid_and_clear(); GEEV_PARAMS_t geev_params; assert(JOBVL == 'N'); STACK_TRACE; op_count += 'V'==JOBVL?1:0; op_count += 'V'==JOBVR?1:0; for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_dgeev(&geev_params, JOBVL, JOBVR, (fortran_int)dimensions[0])) { LINEARIZE_DATA_t a_in; LINEARIZE_DATA_t w_out; LINEARIZE_DATA_t vl_out; LINEARIZE_DATA_t vr_out; init_linearize_data(&a_in, geev_params.N, geev_params.N, steps[1], steps[0]); steps += 2; init_linearize_data(&w_out, 1, geev_params.N, 0, steps[0]); steps += 1; if ('V' == geev_params.JOBVL) { init_linearize_data(&vl_out, geev_params.N, geev_params.N, steps[1], steps[0]); steps += 2; } if ('V' == geev_params.JOBVR) { init_linearize_data(&vr_out, geev_params.N, geev_params.N, steps[1], steps[0]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; char **arg_iter = args; /* copy the matrix in */ linearize_DOUBLE_matrix(geev_params.A, *arg_iter++, &a_in); not_ok = call_dgeev(&geev_params); if (!not_ok) { process_dgeev_results(&geev_params); delinearize_CDOUBLE_matrix(*arg_iter++, geev_params.W, &w_out); if ('V' == geev_params.JOBVL) delinearize_CDOUBLE_matrix(*arg_iter++, geev_params.VL, &vl_out); if ('V' == geev_params.JOBVR) delinearize_CDOUBLE_matrix(*arg_iter++, geev_params.VR, &vr_out); } else { /* geev failed */ error_occurred = 1; nan_CDOUBLE_matrix(*arg_iter++, &w_out); if ('V' == geev_params.JOBVL) nan_CDOUBLE_matrix(*arg_iter++, &vl_out); if ('V' == geev_params.JOBVR) nan_CDOUBLE_matrix(*arg_iter++, &vr_out); } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_dgeev(&geev_params); } set_fp_invalid_or_clear(error_occurred); } static void DOUBLE_eig(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { DOUBLE_eig_wrapper('N', 'V', args, dimensions, steps); } static void DOUBLE_eigvals(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { DOUBLE_eig_wrapper('N', 'N', args, dimensions, steps); } #line 2216 static inline void release_zgeev(GEEV_PARAMS_t *params) { free(params->WORK); free(params->A); memset(params, 0, sizeof(*params)); } static inline void CDOUBLE_eig_wrapper(char JOBVL, char JOBVR, char**args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[4]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = 2; int error_occurred = get_fp_invalid_and_clear(); GEEV_PARAMS_t geev_params; assert(JOBVL == 'N'); STACK_TRACE; op_count += 'V'==JOBVL?1:0; op_count += 'V'==JOBVR?1:0; for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_zgeev(&geev_params, JOBVL, JOBVR, (fortran_int)dimensions[0])) { LINEARIZE_DATA_t a_in; LINEARIZE_DATA_t w_out; LINEARIZE_DATA_t vl_out; LINEARIZE_DATA_t vr_out; init_linearize_data(&a_in, geev_params.N, geev_params.N, steps[1], steps[0]); steps += 2; init_linearize_data(&w_out, 1, geev_params.N, 0, steps[0]); steps += 1; if ('V' == geev_params.JOBVL) { init_linearize_data(&vl_out, geev_params.N, geev_params.N, steps[1], steps[0]); steps += 2; } if ('V' == geev_params.JOBVR) { init_linearize_data(&vr_out, geev_params.N, geev_params.N, steps[1], steps[0]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; char **arg_iter = args; /* copy the matrix in */ linearize_CDOUBLE_matrix(geev_params.A, *arg_iter++, &a_in); not_ok = call_zgeev(&geev_params); if (!not_ok) { process_zgeev_results(&geev_params); delinearize_CDOUBLE_matrix(*arg_iter++, geev_params.W, &w_out); if ('V' == geev_params.JOBVL) delinearize_CDOUBLE_matrix(*arg_iter++, geev_params.VL, &vl_out); if ('V' == geev_params.JOBVR) delinearize_CDOUBLE_matrix(*arg_iter++, geev_params.VR, &vr_out); } else { /* geev failed */ error_occurred = 1; nan_CDOUBLE_matrix(*arg_iter++, &w_out); if ('V' == geev_params.JOBVL) nan_CDOUBLE_matrix(*arg_iter++, &vl_out); if ('V' == geev_params.JOBVR) nan_CDOUBLE_matrix(*arg_iter++, &vr_out); } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_zgeev(&geev_params); } set_fp_invalid_or_clear(error_occurred); } static void CDOUBLE_eig(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CDOUBLE_eig_wrapper('N', 'V', args, dimensions, steps); } static void CDOUBLE_eigvals(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CDOUBLE_eig_wrapper('N', 'N', args, dimensions, steps); } /* -------------------------------------------------------------------------- */ /* singular value decomposition */ typedef struct gessd_params_struct { void *A; void *S; void *U; void *VT; void *WORK; void *RWORK; void *IWORK; fortran_int M; fortran_int N; fortran_int LDA; fortran_int LDU; fortran_int LDVT; fortran_int LWORK; char JOBZ; } GESDD_PARAMS_t; static inline void dump_gesdd_params(const char *name, GESDD_PARAMS_t *params) { TRACE_TXT("\n%s:\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %15c'%c'\n", name, "A", params->A, "S", params->S, "U", params->U, "VT", params->VT, "WORK", params->WORK, "RWORK", params->RWORK, "IWORK", params->IWORK, "M", (int)params->M, "N", (int)params->N, "LDA", (int)params->LDA, "LDU", (int)params->LDU, "LDVT", (int)params->LDVT, "LWORK", (int)params->LWORK, "JOBZ", ' ',params->JOBZ); } static inline int compute_urows_vtcolumns(char jobz, fortran_int m, fortran_int n, fortran_int *urows, fortran_int *vtcolumns) { fortran_int min_m_n = mM = m; params->N = n; params->A = a; params->S = s; params->U = u; params->VT = vt; params->WORK = work; params->RWORK = NULL; params->IWORK = iwork; params->M = m; params->N = n; params->LDA = m; params->LDU = m; params->LDVT = vt_column_count; params->LWORK = work_count; params->JOBZ = jobz; return 1; error: TRACE_TXT("%s failed init\n", __FUNCTION__); free(mem_buff); free(mem_buff2); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_sgesdd(GESDD_PARAMS_t *params) { fortran_int rv; LAPACK(sgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, params->A, ¶ms->LDA, params->S, params->U, ¶ms->LDU, params->VT, ¶ms->LDVT, params->WORK, ¶ms->LWORK, params->IWORK, &rv); return rv; } #line 2439 static inline int init_dgesdd(GESDD_PARAMS_t *params, char jobz, fortran_int m, fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *s, *u, *vt, *work, *iwork; size_t a_size = (size_t)m*(size_t)n*sizeof(fortran_doublereal); fortran_int min_m_n = mM = m; params->N = n; params->A = a; params->S = s; params->U = u; params->VT = vt; params->WORK = work; params->RWORK = NULL; params->IWORK = iwork; params->M = m; params->N = n; params->LDA = m; params->LDU = m; params->LDVT = vt_column_count; params->LWORK = work_count; params->JOBZ = jobz; return 1; error: TRACE_TXT("%s failed init\n", __FUNCTION__); free(mem_buff); free(mem_buff2); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_dgesdd(GESDD_PARAMS_t *params) { fortran_int rv; LAPACK(dgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, params->A, ¶ms->LDA, params->S, params->U, ¶ms->LDU, params->VT, ¶ms->LDVT, params->WORK, ¶ms->LWORK, params->IWORK, &rv); return rv; } #line 2550 static inline int init_cgesdd(GESDD_PARAMS_t *params, char jobz, fortran_int m, fortran_int n) { npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; fortran_int u_row_count, vt_column_count, work_count; fortran_int min_m_n = marray[0]; work_size = (size_t)work_count * sizeof(fortran_complex); } mem_buff2 = malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; params->A = a; params->S = s; params->U = u; params->VT = vt; params->WORK = work; params->RWORK = rwork; params->IWORK = iwork; params->M = m; params->N = n; params->LDA = m; params->LDU = m; params->LDVT = vt_column_count; params->LWORK = work_count; params->JOBZ = jobz; return 1; error: TRACE_TXT("%s failed init\n", __FUNCTION__); free(mem_buff2); free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_cgesdd(GESDD_PARAMS_t *params) { fortran_int rv; LAPACK(cgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, params->A, ¶ms->LDA, params->S, params->U, ¶ms->LDU, params->VT, ¶ms->LDVT, params->WORK, ¶ms->LWORK, params->RWORK, params->IWORK, &rv); return rv; } #line 2550 static inline int init_zgesdd(GESDD_PARAMS_t *params, char jobz, fortran_int m, fortran_int n) { npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; fortran_int u_row_count, vt_column_count, work_count; fortran_int min_m_n = marray[0]; work_size = (size_t)work_count * sizeof(fortran_doublecomplex); } mem_buff2 = malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; params->A = a; params->S = s; params->U = u; params->VT = vt; params->WORK = work; params->RWORK = rwork; params->IWORK = iwork; params->M = m; params->N = n; params->LDA = m; params->LDU = m; params->LDVT = vt_column_count; params->LWORK = work_count; params->JOBZ = jobz; return 1; error: TRACE_TXT("%s failed init\n", __FUNCTION__); free(mem_buff2); free(mem_buff); memset(params, 0, sizeof(*params)); return 0; } static inline fortran_int call_zgesdd(GESDD_PARAMS_t *params) { fortran_int rv; LAPACK(zgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, params->A, ¶ms->LDA, params->S, params->U, ¶ms->LDU, params->VT, ¶ms->LDVT, params->WORK, ¶ms->LWORK, params->RWORK, params->IWORK, &rv); return rv; } #line 2665 static inline void release_sgesdd(GESDD_PARAMS_t* params) { /* A and WORK contain allocated blocks */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void FLOAT_svd_wrapper(char JOBZ, char **args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[4]; int error_occurred = get_fp_invalid_and_clear(); size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:4; GESDD_PARAMS_t params; for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_sgesdd(¶ms, JOBZ, (fortran_int)dimensions[0], (fortran_int)dimensions[1])) { LINEARIZE_DATA_t a_in, u_out, s_out, v_out; init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); if ('N' == params.JOBZ) { /* only the singular values are wanted */ fortran_int min_m_n = params.M < params.N? params.M : params.N; init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); } else { fortran_int u_columns, v_rows; fortran_int min_m_n = params.M < params.N? params.M : params.N; if ('S' == params.JOBZ) { u_columns = min_m_n; v_rows = min_m_n; } else { u_columns = params.M; v_rows = params.N; } init_linearize_data(&u_out, u_columns, params.M, steps[3], steps[2]); init_linearize_data(&s_out, 1, min_m_n, 0, steps[4]); init_linearize_data(&v_out, params.N, v_rows, steps[6], steps[5]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_FLOAT_matrix(params.A, args[0], &a_in); not_ok = call_sgesdd(¶ms); if (!not_ok) { if ('N' == params.JOBZ) { delinearize_FLOAT_matrix(args[1], params.S, &s_out); } else { delinearize_FLOAT_matrix(args[1], params.U, &u_out); delinearize_FLOAT_matrix(args[2], params.S, &s_out); delinearize_FLOAT_matrix(args[3], params.VT, &v_out); } } else { error_occurred = 1; if ('N' == params.JOBZ) { nan_FLOAT_matrix(args[1], &s_out); } else { nan_FLOAT_matrix(args[1], &u_out); nan_FLOAT_matrix(args[2], &s_out); nan_FLOAT_matrix(args[3], &v_out); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_sgesdd(¶ms); } set_fp_invalid_or_clear(error_occurred); } /**end repeat*/ /* svd gufunc entry points */ /**begin repeat #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# */ static void FLOAT_svd_N(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { FLOAT_svd_wrapper('N', args, dimensions, steps); } static void FLOAT_svd_S(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { FLOAT_svd_wrapper('S', args, dimensions, steps); } static void FLOAT_svd_A(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { FLOAT_svd_wrapper('A', args, dimensions, steps); } #line 2665 static inline void release_dgesdd(GESDD_PARAMS_t* params) { /* A and WORK contain allocated blocks */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void DOUBLE_svd_wrapper(char JOBZ, char **args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[4]; int error_occurred = get_fp_invalid_and_clear(); size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:4; GESDD_PARAMS_t params; for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_dgesdd(¶ms, JOBZ, (fortran_int)dimensions[0], (fortran_int)dimensions[1])) { LINEARIZE_DATA_t a_in, u_out, s_out, v_out; init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); if ('N' == params.JOBZ) { /* only the singular values are wanted */ fortran_int min_m_n = params.M < params.N? params.M : params.N; init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); } else { fortran_int u_columns, v_rows; fortran_int min_m_n = params.M < params.N? params.M : params.N; if ('S' == params.JOBZ) { u_columns = min_m_n; v_rows = min_m_n; } else { u_columns = params.M; v_rows = params.N; } init_linearize_data(&u_out, u_columns, params.M, steps[3], steps[2]); init_linearize_data(&s_out, 1, min_m_n, 0, steps[4]); init_linearize_data(&v_out, params.N, v_rows, steps[6], steps[5]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_DOUBLE_matrix(params.A, args[0], &a_in); not_ok = call_dgesdd(¶ms); if (!not_ok) { if ('N' == params.JOBZ) { delinearize_DOUBLE_matrix(args[1], params.S, &s_out); } else { delinearize_DOUBLE_matrix(args[1], params.U, &u_out); delinearize_DOUBLE_matrix(args[2], params.S, &s_out); delinearize_DOUBLE_matrix(args[3], params.VT, &v_out); } } else { error_occurred = 1; if ('N' == params.JOBZ) { nan_DOUBLE_matrix(args[1], &s_out); } else { nan_DOUBLE_matrix(args[1], &u_out); nan_DOUBLE_matrix(args[2], &s_out); nan_DOUBLE_matrix(args[3], &v_out); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_dgesdd(¶ms); } set_fp_invalid_or_clear(error_occurred); } /**end repeat*/ /* svd gufunc entry points */ /**begin repeat #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# */ static void DOUBLE_svd_N(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { DOUBLE_svd_wrapper('N', args, dimensions, steps); } static void DOUBLE_svd_S(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { DOUBLE_svd_wrapper('S', args, dimensions, steps); } static void DOUBLE_svd_A(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { DOUBLE_svd_wrapper('A', args, dimensions, steps); } #line 2665 static inline void release_cgesdd(GESDD_PARAMS_t* params) { /* A and WORK contain allocated blocks */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void CFLOAT_svd_wrapper(char JOBZ, char **args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[4]; int error_occurred = get_fp_invalid_and_clear(); size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:4; GESDD_PARAMS_t params; for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_cgesdd(¶ms, JOBZ, (fortran_int)dimensions[0], (fortran_int)dimensions[1])) { LINEARIZE_DATA_t a_in, u_out, s_out, v_out; init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); if ('N' == params.JOBZ) { /* only the singular values are wanted */ fortran_int min_m_n = params.M < params.N? params.M : params.N; init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); } else { fortran_int u_columns, v_rows; fortran_int min_m_n = params.M < params.N? params.M : params.N; if ('S' == params.JOBZ) { u_columns = min_m_n; v_rows = min_m_n; } else { u_columns = params.M; v_rows = params.N; } init_linearize_data(&u_out, u_columns, params.M, steps[3], steps[2]); init_linearize_data(&s_out, 1, min_m_n, 0, steps[4]); init_linearize_data(&v_out, params.N, v_rows, steps[6], steps[5]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_CFLOAT_matrix(params.A, args[0], &a_in); not_ok = call_cgesdd(¶ms); if (!not_ok) { if ('N' == params.JOBZ) { delinearize_FLOAT_matrix(args[1], params.S, &s_out); } else { delinearize_CFLOAT_matrix(args[1], params.U, &u_out); delinearize_FLOAT_matrix(args[2], params.S, &s_out); delinearize_CFLOAT_matrix(args[3], params.VT, &v_out); } } else { error_occurred = 1; if ('N' == params.JOBZ) { nan_FLOAT_matrix(args[1], &s_out); } else { nan_CFLOAT_matrix(args[1], &u_out); nan_FLOAT_matrix(args[2], &s_out); nan_CFLOAT_matrix(args[3], &v_out); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_cgesdd(¶ms); } set_fp_invalid_or_clear(error_occurred); } /**end repeat*/ /* svd gufunc entry points */ /**begin repeat #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# */ static void CFLOAT_svd_N(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CFLOAT_svd_wrapper('N', args, dimensions, steps); } static void CFLOAT_svd_S(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CFLOAT_svd_wrapper('S', args, dimensions, steps); } static void CFLOAT_svd_A(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CFLOAT_svd_wrapper('A', args, dimensions, steps); } #line 2665 static inline void release_zgesdd(GESDD_PARAMS_t* params) { /* A and WORK contain allocated blocks */ free(params->A); free(params->WORK); memset(params, 0, sizeof(*params)); } static inline void CDOUBLE_svd_wrapper(char JOBZ, char **args, npy_intp* dimensions, npy_intp* steps) { ptrdiff_t outer_steps[4]; int error_occurred = get_fp_invalid_and_clear(); size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:4; GESDD_PARAMS_t params; for (iter=0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; if (init_zgesdd(¶ms, JOBZ, (fortran_int)dimensions[0], (fortran_int)dimensions[1])) { LINEARIZE_DATA_t a_in, u_out, s_out, v_out; init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); if ('N' == params.JOBZ) { /* only the singular values are wanted */ fortran_int min_m_n = params.M < params.N? params.M : params.N; init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); } else { fortran_int u_columns, v_rows; fortran_int min_m_n = params.M < params.N? params.M : params.N; if ('S' == params.JOBZ) { u_columns = min_m_n; v_rows = min_m_n; } else { u_columns = params.M; v_rows = params.N; } init_linearize_data(&u_out, u_columns, params.M, steps[3], steps[2]); init_linearize_data(&s_out, 1, min_m_n, 0, steps[4]); init_linearize_data(&v_out, params.N, v_rows, steps[6], steps[5]); } for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ linearize_CDOUBLE_matrix(params.A, args[0], &a_in); not_ok = call_zgesdd(¶ms); if (!not_ok) { if ('N' == params.JOBZ) { delinearize_DOUBLE_matrix(args[1], params.S, &s_out); } else { delinearize_CDOUBLE_matrix(args[1], params.U, &u_out); delinearize_DOUBLE_matrix(args[2], params.S, &s_out); delinearize_CDOUBLE_matrix(args[3], params.VT, &v_out); } } else { error_occurred = 1; if ('N' == params.JOBZ) { nan_DOUBLE_matrix(args[1], &s_out); } else { nan_CDOUBLE_matrix(args[1], &u_out); nan_DOUBLE_matrix(args[2], &s_out); nan_CDOUBLE_matrix(args[3], &v_out); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } release_zgesdd(¶ms); } set_fp_invalid_or_clear(error_occurred); } /**end repeat*/ /* svd gufunc entry points */ /**begin repeat #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# */ static void CDOUBLE_svd_N(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CDOUBLE_svd_wrapper('N', args, dimensions, steps); } static void CDOUBLE_svd_S(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CDOUBLE_svd_wrapper('S', args, dimensions, steps); } static void CDOUBLE_svd_A(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { CDOUBLE_svd_wrapper('A', args, dimensions, steps); } #pragma GCC diagnostic pop /* -------------------------------------------------------------------------- */ /* gufunc registration */ static void *array_of_nulls[] = { (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL }; #define FUNC_ARRAY_NAME(NAME) NAME ## _funcs #define GUFUNC_FUNC_ARRAY_REAL(NAME) \ static PyUFuncGenericFunction \ FUNC_ARRAY_NAME(NAME)[] = { \ FLOAT_ ## NAME, \ DOUBLE_ ## NAME \ } #define GUFUNC_FUNC_ARRAY_REAL_COMPLEX(NAME) \ static PyUFuncGenericFunction \ FUNC_ARRAY_NAME(NAME)[] = { \ FLOAT_ ## NAME, \ DOUBLE_ ## NAME, \ CFLOAT_ ## NAME, \ CDOUBLE_ ## NAME \ } /* There are problems with eig in complex single precision. * That kernel is disabled */ #define GUFUNC_FUNC_ARRAY_EIG(NAME) \ static PyUFuncGenericFunction \ FUNC_ARRAY_NAME(NAME)[] = { \ FLOAT_ ## NAME, \ DOUBLE_ ## NAME, \ CDOUBLE_ ## NAME \ } GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); GUFUNC_FUNC_ARRAY_EIG(eig); GUFUNC_FUNC_ARRAY_EIG(eigvals); static char equal_2_types[] = { NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_CFLOAT, NPY_CFLOAT, NPY_CDOUBLE, NPY_CDOUBLE }; static char equal_3_types[] = { NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE }; /* second result is logdet, that will always be a REAL */ static char slogdet_types[] = { NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE }; static char eigh_types[] = { NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE }; static char eighvals_types[] = { NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_CFLOAT, NPY_FLOAT, NPY_CDOUBLE, NPY_DOUBLE }; static char eig_types[] = { NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE }; static char eigvals_types[] = { NPY_FLOAT, NPY_CFLOAT, NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE }; static char svd_1_1_types[] = { NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_CFLOAT, NPY_FLOAT, NPY_CDOUBLE, NPY_DOUBLE }; static char svd_1_3_types[] = { NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE }; typedef struct gufunc_descriptor_struct { char *name; char *signature; char *doc; int ntypes; int nin; int nout; PyUFuncGenericFunction *funcs; char *types; } GUFUNC_DESCRIPTOR_t; GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { { "slogdet", "(m,m)->(),()", "slogdet on the last two dimensions and broadcast on the rest. \n"\ "Results in two arrays, one with sign and the other with log of the"\ " determinants. \n"\ " \"(m,m)->(),()\" \n", 4, 1, 2, FUNC_ARRAY_NAME(slogdet), slogdet_types }, { "det", "(m,m)->()", "det of the last two dimensions and broadcast on the rest. \n"\ " \"(m,m)->()\" \n", 4, 1, 1, FUNC_ARRAY_NAME(det), equal_2_types }, { "eigh_lo", "(m,m)->(m),(m,m)", "eigh on the last two dimension and broadcast to the rest, using"\ " lower triangle \n"\ "Results in a vector of eigenvalues and a matrix with the"\ "eigenvectors. \n"\ " \"(m,m)->(m),(m,m)\" \n", 4, 1, 2, FUNC_ARRAY_NAME(eighlo), eigh_types }, { "eigh_up", "(m,m)->(m),(m,m)", "eigh on the last two dimension and broadcast to the rest, using"\ " upper triangle. \n"\ "Results in a vector of eigenvalues and a matrix with the"\ " eigenvectors. \n"\ " \"(m,m)->(m),(m,m)\" \n", 4, 1, 2, FUNC_ARRAY_NAME(eighup), eigh_types }, { "eigvalsh_lo", "(m,m)->(m)", "eigh on the last two dimension and broadcast to the rest, using"\ " lower triangle. \n"\ "Results in a vector of eigenvalues and a matrix with the"\ "eigenvectors. \n"\ " \"(m,m)->(m)\" \n", 4, 1, 1, FUNC_ARRAY_NAME(eigvalshlo), eighvals_types }, { "eigvalsh_up", "(m,m)->(m)", "eigvalsh on the last two dimension and broadcast to the rest,"\ " using upper triangle. \n"\ "Results in a vector of eigenvalues and a matrix with the"\ "eigenvectors.\n"\ " \"(m,m)->(m)\" \n", 4, 1, 1, FUNC_ARRAY_NAME(eigvalshup), eighvals_types }, { "solve", "(m,m),(m,n)->(m,n)", "solve the system a x = b, on the last two dimensions, broadcast"\ " to the rest. \n"\ "Results in a matrices with the solutions. \n"\ " \"(m,m),(m,n)->(m,n)\" \n", 4, 2, 1, FUNC_ARRAY_NAME(solve), equal_3_types }, { "solve1", "(m,m),(m)->(m)", "solve the system a x = b, for b being a vector, broadcast in"\ " the outer dimensions. \n"\ "Results in vectors with the solutions. \n"\ " \"(m,m),(m)->(m)\" \n", 4,2,1, FUNC_ARRAY_NAME(solve1), equal_3_types }, { "inv", "(m,m)->(m,m)", "compute the inverse of the last two dimensions and broadcast"\ " to the rest. \n"\ "Results in the inverse matrices. \n"\ " \"(m,m)->(m,m)\" \n", 4,1,1, FUNC_ARRAY_NAME(inv), equal_2_types }, { "cholesky_lo", "(m,m)->(m,m)", "cholesky decomposition of hermitian positive-definite matrices. \n"\ "Broadcast to all outer dimensions. \n"\ " \"(m,m)->(m,m)\" \n", 4, 1, 1, FUNC_ARRAY_NAME(cholesky_lo), equal_2_types }, { "svd_m", "(m,n)->(m)", "svd when n>=m. ", 4, 1, 1, FUNC_ARRAY_NAME(svd_N), svd_1_1_types }, { "svd_n", "(m,n)->(n)", "svd when n<=m", 4, 1, 1, FUNC_ARRAY_NAME(svd_N), svd_1_1_types }, { "svd_m_s", "(m,n)->(m,m),(m),(m,n)", "svd when m>=n", 4, 1, 3, FUNC_ARRAY_NAME(svd_S), svd_1_3_types }, { "svd_n_s", "(m,n)->(m,n),(n),(n,n)", "svd when m>=n", 4, 1, 3, FUNC_ARRAY_NAME(svd_S), svd_1_3_types }, { "svd_m_f", "(m,n)->(m,m),(m),(n,n)", "svd when m>=n", 4, 1, 3, FUNC_ARRAY_NAME(svd_A), svd_1_3_types }, { "svd_n_f", "(m,n)->(m,m),(n),(n,n)", "svd when m>=n", 4, 1, 3, FUNC_ARRAY_NAME(svd_A), svd_1_3_types }, { "eig", "(m,m)->(m),(m,m)", "eig on the last two dimension and broadcast to the rest. \n"\ "Results in a vector with the eigenvalues and a matrix with the"\ " eigenvectors. \n"\ " \"(m,m)->(m),(m,m)\" \n", 3, 1, 2, FUNC_ARRAY_NAME(eig), eig_types }, { "eigvals", "(m,m)->(m)", "eigvals on the last two dimension and broadcast to the rest. \n"\ "Results in a vector of eigenvalues. \n"\ " \"(m,m)->(m),(m,m)\" \n", 3, 1, 1, FUNC_ARRAY_NAME(eigvals), eigvals_types }, }; static void addUfuncs(PyObject *dictionary) { PyObject *f; int i; const int gufunc_count = sizeof(gufunc_descriptors)/ sizeof(gufunc_descriptors[0]); for (i=0; i < gufunc_count; i++) { GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i]; f = PyUFunc_FromFuncAndDataAndSignature(d->funcs, array_of_nulls, d->types, d->ntypes, d->nin, d->nout, PyUFunc_None, d->name, d->doc, 0, d->signature); PyDict_SetItemString(dictionary, d->name, f); #if 0 dump_ufunc_object((PyUFuncObject*) f); #endif Py_DECREF(f); } } /* -------------------------------------------------------------------------- */ /* Module initialization stuff */ static PyMethodDef UMath_LinAlgMethods[] = { {NULL, NULL, 0, NULL} /* Sentinel */ }; #if defined(NPY_PY3K) static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, UMATH_LINALG_MODULE_NAME, NULL, -1, UMath_LinAlgMethods, NULL, NULL, NULL, NULL }; #endif #if defined(NPY_PY3K) #define RETVAL m PyObject *PyInit__umath_linalg(void) #else #define RETVAL PyMODINIT_FUNC init_umath_linalg(void) #endif { PyObject *m; PyObject *d; PyObject *version; init_constants(); #if defined(NPY_PY3K) m = PyModule_Create(&moduledef); #else m = Py_InitModule(UMATH_LINALG_MODULE_NAME, UMath_LinAlgMethods); #endif if (m == NULL) return RETVAL; import_array(); import_ufunc(); d = PyModule_GetDict(m); version = PyString_FromString(umath_linalg_version_string); PyDict_SetItemString(d, "__version__", version); Py_DECREF(version); /* Load the ufunc operators into the module's namespace */ addUfuncs(d); if (PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "cannot load _umath_linalg module."); } return RETVAL; }