tmp
/
pip-install-ghxuqwgs
/numpy_78e94bf2b6094bf9a1f3d92042f9bf46
/build
/src.linux-x86_64-3.1
/numpy
/linalg
/umath_linalg.c
/* | |
***************************************************************************** | |
** This file was autogenerated from a template DO NOT EDIT!!!! ** | |
** Changes should be made to the original source (.src) file ** | |
***************************************************************************** | |
*/ | |
/* -*- c -*- */ | |
/* | |
***************************************************************************** | |
** INCLUDES ** | |
***************************************************************************** | |
*/ | |
static const char* umath_linalg_version_string = "0.1.4"; | |
/* | |
**************************************************************************** | |
* Debugging support * | |
**************************************************************************** | |
*/ | |
void | |
dbg_stack_trace() | |
{ | |
void *trace[32]; | |
size_t size; | |
size = backtrace(trace, sizeof(trace)/sizeof(trace[0])); | |
backtrace_symbols_fd(trace, size, 1); | |
} | |
/* | |
***************************************************************************** | |
* BLAS/LAPACK calling macros * | |
***************************************************************************** | |
*/ | |
typedef struct { float r, i; } f2c_complex; | |
typedef struct { double r, i; } f2c_doublecomplex; | |
/* typedef long int (*L_fp)(); */ | |
extern int | |
FNAME(sgeev)(char *jobvl, char *jobvr, int *n, | |
float a[], int *lda, float wr[], float wi[], | |
float vl[], int *ldvl, float vr[], int *ldvr, | |
float work[], int lwork[], | |
int *info); | |
extern int | |
FNAME(dgeev)(char *jobvl, char *jobvr, int *n, | |
double a[], int *lda, double wr[], double wi[], | |
double vl[], int *ldvl, double vr[], int *ldvr, | |
double work[], int lwork[], | |
int *info); | |
extern int | |
FNAME(cgeev)(char *jobvl, char *jobvr, int *n, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex w[], | |
f2c_doublecomplex vl[], int *ldvl, | |
f2c_doublecomplex vr[], int *ldvr, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], | |
int *info); | |
extern int | |
FNAME(zgeev)(char *jobvl, char *jobvr, int *n, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex w[], | |
f2c_doublecomplex vl[], int *ldvl, | |
f2c_doublecomplex vr[], int *ldvr, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], | |
int *info); | |
extern int | |
FNAME(ssyevd)(char *jobz, char *uplo, int *n, | |
float a[], int *lda, float w[], float work[], | |
int *lwork, int iwork[], int *liwork, | |
int *info); | |
extern int | |
FNAME(dsyevd)(char *jobz, char *uplo, int *n, | |
double a[], int *lda, double w[], double work[], | |
int *lwork, int iwork[], int *liwork, | |
int *info); | |
extern int | |
FNAME(cheevd)(char *jobz, char *uplo, int *n, | |
f2c_complex a[], int *lda, | |
float w[], f2c_complex work[], | |
int *lwork, float rwork[], int *lrwork, int iwork[], | |
int *liwork, | |
int *info); | |
extern int | |
FNAME(zheevd)(char *jobz, char *uplo, int *n, | |
f2c_doublecomplex a[], int *lda, | |
double w[], f2c_doublecomplex work[], | |
int *lwork, double rwork[], int *lrwork, int iwork[], | |
int *liwork, | |
int *info); | |
extern int | |
FNAME(dgelsd)(int *m, int *n, int *nrhs, | |
double a[], int *lda, double b[], int *ldb, | |
double s[], double *rcond, int *rank, | |
double work[], int *lwork, int iwork[], | |
int *info); | |
extern int | |
FNAME(zgelsd)(int *m, int *n, int *nrhs, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex b[], int *ldb, | |
double s[], double *rcond, int *rank, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], int iwork[], | |
int *info); | |
extern int | |
FNAME(sgesv)(int *n, int *nrhs, | |
float a[], int *lda, | |
int ipiv[], | |
float b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(dgesv)(int *n, int *nrhs, | |
double a[], int *lda, | |
int ipiv[], | |
double b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(cgesv)(int *n, int *nrhs, | |
f2c_complex a[], int *lda, | |
int ipiv[], | |
f2c_complex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(zgesv)(int *n, int *nrhs, | |
f2c_doublecomplex a[], int *lda, | |
int ipiv[], | |
f2c_doublecomplex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(sgetrf)(int *m, int *n, | |
float a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(dgetrf)(int *m, int *n, | |
double a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(cgetrf)(int *m, int *n, | |
f2c_complex a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(zgetrf)(int *m, int *n, | |
f2c_doublecomplex a[], int *lda, | |
int ipiv[], | |
int *info); | |
extern int | |
FNAME(spotrf)(char *uplo, int *n, | |
float a[], int *lda, | |
int *info); | |
extern int | |
FNAME(dpotrf)(char *uplo, int *n, | |
double a[], int *lda, | |
int *info); | |
extern int | |
FNAME(cpotrf)(char *uplo, int *n, | |
f2c_complex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(zpotrf)(char *uplo, int *n, | |
f2c_doublecomplex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(sgesdd)(char *jobz, int *m, int *n, | |
float a[], int *lda, float s[], float u[], | |
int *ldu, float vt[], int *ldvt, float work[], | |
int *lwork, int iwork[], int *info); | |
extern int | |
FNAME(dgesdd)(char *jobz, int *m, int *n, | |
double a[], int *lda, double s[], double u[], | |
int *ldu, double vt[], int *ldvt, double work[], | |
int *lwork, int iwork[], int *info); | |
extern int | |
FNAME(cgesdd)(char *jobz, int *m, int *n, | |
f2c_complex a[], int *lda, | |
float s[], f2c_complex u[], int *ldu, | |
f2c_complex vt[], int *ldvt, | |
f2c_complex work[], int *lwork, | |
float rwork[], int iwork[], int *info); | |
extern int | |
FNAME(zgesdd)(char *jobz, int *m, int *n, | |
f2c_doublecomplex a[], int *lda, | |
double s[], f2c_doublecomplex u[], int *ldu, | |
f2c_doublecomplex vt[], int *ldvt, | |
f2c_doublecomplex work[], int *lwork, | |
double rwork[], int iwork[], int *info); | |
extern int | |
FNAME(spotrs)(char *uplo, int *n, int *nrhs, | |
float a[], int *lda, | |
float b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(dpotrs)(char *uplo, int *n, int *nrhs, | |
double a[], int *lda, | |
double b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(cpotrs)(char *uplo, int *n, int *nrhs, | |
f2c_complex a[], int *lda, | |
f2c_complex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(zpotrs)(char *uplo, int *n, int *nrhs, | |
f2c_doublecomplex a[], int *lda, | |
f2c_doublecomplex b[], int *ldb, | |
int *info); | |
extern int | |
FNAME(spotri)(char *uplo, int *n, | |
float a[], int *lda, | |
int *info); | |
extern int | |
FNAME(dpotri)(char *uplo, int *n, | |
double a[], int *lda, | |
int *info); | |
extern int | |
FNAME(cpotri)(char *uplo, int *n, | |
f2c_complex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(zpotri)(char *uplo, int *n, | |
f2c_doublecomplex a[], int *lda, | |
int *info); | |
extern int | |
FNAME(scopy)(int *n, | |
float *sx, int *incx, | |
float *sy, int *incy); | |
extern int | |
FNAME(dcopy)(int *n, | |
double *sx, int *incx, | |
double *sy, int *incy); | |
extern int | |
FNAME(ccopy)(int *n, | |
f2c_complex *sx, int *incx, | |
f2c_complex *sy, int *incy); | |
extern int | |
FNAME(zcopy)(int *n, | |
f2c_doublecomplex *sx, int *incx, | |
f2c_doublecomplex *sy, int *incy); | |
extern float | |
FNAME(sdot)(int *n, | |
float *sx, int *incx, | |
float *sy, int *incy); | |
extern double | |
FNAME(ddot)(int *n, | |
double *sx, int *incx, | |
double *sy, int *incy); | |
extern f2c_complex | |
FNAME(cdotu)(int *n, | |
f2c_complex *sx, int *incx, | |
f2c_complex *sy, int *incy); | |
extern f2c_doublecomplex | |
FNAME(zdotu)(int *n, | |
f2c_doublecomplex *sx, int *incx, | |
f2c_doublecomplex *sy, int *incy); | |
extern f2c_complex | |
FNAME(cdotc)(int *n, | |
f2c_complex *sx, int *incx, | |
f2c_complex *sy, int *incy); | |
extern f2c_doublecomplex | |
FNAME(zdotc)(int *n, | |
f2c_doublecomplex *sx, int *incx, | |
f2c_doublecomplex *sy, int *incy); | |
extern int | |
FNAME(sgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
float *alpha, | |
float *a, int *lda, | |
float *b, int *ldb, | |
float *beta, | |
float *c, int *ldc); | |
extern int | |
FNAME(dgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
double *alpha, | |
double *a, int *lda, | |
double *b, int *ldb, | |
double *beta, | |
double *c, int *ldc); | |
extern int | |
FNAME(cgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
f2c_complex *alpha, | |
f2c_complex *a, int *lda, | |
f2c_complex *b, int *ldb, | |
f2c_complex *beta, | |
f2c_complex *c, int *ldc); | |
extern int | |
FNAME(zgemm)(char *transa, char *transb, | |
int *m, int *n, int *k, | |
f2c_doublecomplex *alpha, | |
f2c_doublecomplex *a, int *lda, | |
f2c_doublecomplex *b, int *ldb, | |
f2c_doublecomplex *beta, | |
f2c_doublecomplex *c, int *ldc); | |
typedef int fortran_int; | |
typedef float fortran_real; | |
typedef double fortran_doublereal; | |
typedef f2c_complex fortran_complex; | |
typedef f2c_doublecomplex fortran_doublecomplex; | |
/* | |
***************************************************************************** | |
** Some handy functions ** | |
***************************************************************************** | |
*/ | |
static inline void * | |
offset_ptr(void* ptr, ptrdiff_t offset) | |
{ | |
return (void*)((npy_uint8*)ptr + offset); | |
} | |
static inline int | |
get_fp_invalid_and_clear(void) | |
{ | |
int status; | |
status = npy_clear_floatstatus(); | |
return !!(status & NPY_FPE_INVALID); | |
} | |
static inline void | |
set_fp_invalid_or_clear(int error_occurred) | |
{ | |
if (error_occurred) { | |
npy_set_floatstatus_invalid(); | |
} | |
else { | |
npy_clear_floatstatus(); | |
} | |
} | |
/* | |
***************************************************************************** | |
** Some handy constants ** | |
***************************************************************************** | |
*/ | |
typedef union { | |
fortran_complex f; | |
npy_cfloat npy; | |
float array[2]; | |
} COMPLEX_t; | |
typedef union { | |
fortran_doublecomplex f; | |
npy_cdouble npy; | |
double array[2]; | |
} DOUBLECOMPLEX_t; | |
static float s_one; | |
static float s_zero; | |
static float s_minus_one; | |
static float s_ninf; | |
static float s_nan; | |
static double d_one; | |
static double d_zero; | |
static double d_minus_one; | |
static double d_ninf; | |
static double d_nan; | |
static COMPLEX_t c_one; | |
static COMPLEX_t c_zero; | |
static COMPLEX_t c_minus_one; | |
static COMPLEX_t c_ninf; | |
static COMPLEX_t c_nan; | |
static DOUBLECOMPLEX_t z_one; | |
static DOUBLECOMPLEX_t z_zero; | |
static DOUBLECOMPLEX_t z_minus_one; | |
static DOUBLECOMPLEX_t z_ninf; | |
static DOUBLECOMPLEX_t z_nan; | |
static void init_constants(void) | |
{ | |
/* | |
this is needed as NPY_INFINITY and NPY_NAN macros | |
can't be used as initializers. I prefer to just set | |
all the constants the same way. | |
*/ | |
s_one = 1.0f; | |
s_zero = 0.0f; | |
s_minus_one = -1.0f; | |
s_ninf = -NPY_INFINITYF; | |
s_nan = NPY_NANF; | |
d_one = 1.0; | |
d_zero = 0.0; | |
d_minus_one = -1.0; | |
d_ninf = -NPY_INFINITY; | |
d_nan = NPY_NAN; | |
c_one.array[0] = 1.0f; | |
c_one.array[1] = 0.0f; | |
c_zero.array[0] = 0.0f; | |
c_zero.array[1] = 0.0f; | |
c_minus_one.array[0] = -1.0f; | |
c_minus_one.array[1] = 0.0f; | |
c_ninf.array[0] = -NPY_INFINITYF; | |
c_ninf.array[1] = 0.0f; | |
c_nan.array[0] = NPY_NANF; | |
c_nan.array[1] = NPY_NANF; | |
z_one.array[0] = 1.0; | |
z_one.array[1] = 0.0; | |
z_zero.array[0] = 0.0; | |
z_zero.array[1] = 0.0; | |
z_minus_one.array[0] = -1.0; | |
z_minus_one.array[1] = 0.0; | |
z_ninf.array[0] = -NPY_INFINITY; | |
z_ninf.array[1] = 0.0; | |
z_nan.array[0] = NPY_NAN; | |
z_nan.array[1] = NPY_NAN; | |
} | |
/* | |
***************************************************************************** | |
** Structs used for data rearrangement ** | |
***************************************************************************** | |
*/ | |
/* this struct contains information about how to linearize in a local buffer | |
a matrix so that it can be used by blas functions. | |
All strides are specified in number of elements (similar to what blas | |
expects) | |
dst_row_strides: number of elements between different row. Matrix is | |
considered row-major | |
dst_column_strides: number of elements between differnt columns in the | |
destination buffer | |
rows: number of rows of the matrix | |
columns: number of columns of the matrix | |
src_row_strides: strides needed to access the next row in the source matrix | |
src_column_strides: strides needed to access the next column in the source | |
matrix | |
*/ | |
typedef struct linearize_data_struct | |
{ | |
size_t rows; | |
size_t columns; | |
ptrdiff_t row_strides; | |
ptrdiff_t column_strides; | |
} LINEARIZE_DATA_t; | |
static inline void | |
init_linearize_data(LINEARIZE_DATA_t *lin_data, | |
int rows, | |
int columns, | |
ptrdiff_t row_strides, | |
ptrdiff_t column_strides) | |
{ | |
lin_data->rows = rows; | |
lin_data->columns = columns; | |
lin_data->row_strides = row_strides; | |
lin_data->column_strides = column_strides; | |
} | |
static inline void | |
dump_ufunc_object(PyUFuncObject* ufunc) | |
{ | |
TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n", | |
ufunc->core_enabled? "generalized ufunc" : "scalar ufunc", | |
ufunc->name, ufunc->nin, ufunc->nout, ufunc->ntypes); | |
if (ufunc->core_enabled) { | |
int arg; | |
int dim; | |
TRACE_TXT("\t%s (%d dimension(s) detected).\n", | |
ufunc->core_signature, ufunc->core_num_dim_ix); | |
for (arg = 0; arg < ufunc->nargs; arg++){ | |
int * arg_dim_ix = ufunc->core_dim_ixs + ufunc->core_offsets[arg]; | |
TRACE_TXT("\t\targ %d (%s) has %d dimension(s): (", | |
arg, arg < ufunc->nin? "INPUT" : "OUTPUT", | |
ufunc->core_num_dims[arg]); | |
for (dim = 0; dim < ufunc->core_num_dims[arg]; dim ++) { | |
TRACE_TXT(" %d", arg_dim_ix[dim]); | |
} | |
TRACE_TXT(" )\n"); | |
} | |
} | |
} | |
static inline void | |
dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params) | |
{ | |
TRACE_TXT("\n\t%s rows: %zd columns: %zd"\ | |
"\n\t\trow_strides: %td column_strides: %td"\ | |
"\n", name, params->rows, params->columns, | |
params->row_strides, params->column_strides); | |
} | |
static inline float | |
FLOAT_add(float op1, float op2) | |
{ | |
return op1 + op2; | |
} | |
static inline double | |
DOUBLE_add(double op1, double op2) | |
{ | |
return op1 + op2; | |
} | |
static inline COMPLEX_t | |
CFLOAT_add(COMPLEX_t op1, COMPLEX_t op2) | |
{ | |
COMPLEX_t result; | |
result.array[0] = op1.array[0] + op2.array[0]; | |
result.array[1] = op1.array[1] + op2.array[1]; | |
return result; | |
} | |
static inline DOUBLECOMPLEX_t | |
CDOUBLE_add(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
{ | |
DOUBLECOMPLEX_t result; | |
result.array[0] = op1.array[0] + op2.array[0]; | |
result.array[1] = op1.array[1] + op2.array[1]; | |
return result; | |
} | |
static inline float | |
FLOAT_mul(float op1, float op2) | |
{ | |
return op1*op2; | |
} | |
static inline double | |
DOUBLE_mul(double op1, double op2) | |
{ | |
return op1*op2; | |
} | |
static inline COMPLEX_t | |
CFLOAT_mul(COMPLEX_t op1, COMPLEX_t op2) | |
{ | |
COMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; | |
return result; | |
} | |
static inline DOUBLECOMPLEX_t | |
CDOUBLE_mul(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
{ | |
DOUBLECOMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1]; | |
return result; | |
} | |
static inline float | |
FLOAT_mulc(float op1, float op2) | |
{ | |
return op1*op2; | |
} | |
static inline double | |
DOUBLE_mulc(float op1, float op2) | |
{ | |
return op1*op2; | |
} | |
static inline COMPLEX_t | |
CFLOAT_mulc(COMPLEX_t op1, COMPLEX_t op2) | |
{ | |
COMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; | |
return result; | |
} | |
static inline DOUBLECOMPLEX_t | |
CDOUBLE_mulc(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2) | |
{ | |
DOUBLECOMPLEX_t result; | |
result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1]; | |
result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0]; | |
return result; | |
} | |
static inline void | |
print_FLOAT(npy_float s) | |
{ | |
TRACE_TXT(" %8.4f", s); | |
} | |
static inline void | |
print_DOUBLE(npy_double d) | |
{ | |
TRACE_TXT(" %10.6f", d); | |
} | |
static inline void | |
print_CFLOAT(npy_cfloat c) | |
{ | |
float* c_parts = (float*)&c; | |
TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]); | |
} | |
static inline void | |
print_CDOUBLE(npy_cdouble z) | |
{ | |
double* z_parts = (double*)&z; | |
TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]); | |
} | |
static inline void | |
dump_FLOAT_matrix(const char* name, | |
size_t rows, size_t columns, | |
const npy_float* ptr) | |
{ | |
size_t i,j; | |
TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); | |
for (i=0; i<rows; i++) | |
{ | |
TRACE_TXT("| "); | |
for (j=0; j<columns; j++) | |
{ | |
print_FLOAT(ptr[j*rows + i]); | |
TRACE_TXT(", "); | |
} | |
TRACE_TXT(" |\n"); | |
} | |
} | |
static inline void | |
dump_DOUBLE_matrix(const char* name, | |
size_t rows, size_t columns, | |
const npy_double* ptr) | |
{ | |
size_t i,j; | |
TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); | |
for (i=0; i<rows; i++) | |
{ | |
TRACE_TXT("| "); | |
for (j=0; j<columns; j++) | |
{ | |
print_DOUBLE(ptr[j*rows + i]); | |
TRACE_TXT(", "); | |
} | |
TRACE_TXT(" |\n"); | |
} | |
} | |
static inline void | |
dump_CFLOAT_matrix(const char* name, | |
size_t rows, size_t columns, | |
const npy_cfloat* ptr) | |
{ | |
size_t i,j; | |
TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); | |
for (i=0; i<rows; i++) | |
{ | |
TRACE_TXT("| "); | |
for (j=0; j<columns; j++) | |
{ | |
print_CFLOAT(ptr[j*rows + i]); | |
TRACE_TXT(", "); | |
} | |
TRACE_TXT(" |\n"); | |
} | |
} | |
static inline void | |
dump_CDOUBLE_matrix(const char* name, | |
size_t rows, size_t columns, | |
const npy_cdouble* ptr) | |
{ | |
size_t i,j; | |
TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns); | |
for (i=0; i<rows; i++) | |
{ | |
TRACE_TXT("| "); | |
for (j=0; j<columns; j++) | |
{ | |
print_CDOUBLE(ptr[j*rows + i]); | |
TRACE_TXT(", "); | |
} | |
TRACE_TXT(" |\n"); | |
} | |
} | |
/* | |
***************************************************************************** | |
** Basics ** | |
***************************************************************************** | |
*/ | |
static inline void | |
update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count) | |
{ | |
size_t i; | |
for (i=0; i < count; ++i) { | |
bases[i] += offsets[i]; | |
} | |
} | |
/* disable -Wmaybe-uninitialized as there is some code that generate false | |
positives with this warning | |
*/ | |
/* | |
***************************************************************************** | |
** HELPER FUNCS ** | |
***************************************************************************** | |
*/ | |
/* rearranging of 2D matrices using blas */ | |
static inline void * | |
linearize_FLOAT_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
float *src = (float *) src_in; | |
float *dst = (float *) dst_in; | |
if (dst) { | |
int i, j; | |
float* rv = dst; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(float)); | |
fortran_int one = 1; | |
for (i=0; i< data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(scopy)(&columns, | |
(void*)src, &column_strides, | |
(void*)dst, &one); | |
} | |
else if (column_strides < 0) { | |
FNAME(scopy)(&columns, | |
(void*)((float*)src + (columns-1)*column_strides), | |
&column_strides, | |
(void*)dst, &one); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
for (j = 0; j < columns; ++j) { | |
memcpy((float*)dst + j, (float*)src, sizeof(float)); | |
} | |
} | |
src += data->row_strides/sizeof(float); | |
dst += data->columns; | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void * | |
delinearize_FLOAT_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
float *src = (float *) src_in; | |
float *dst = (float *) dst_in; | |
if (src) { | |
int i; | |
float *rv = src; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(float)); | |
fortran_int one = 1; | |
for (i=0; i < data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(scopy)(&columns, | |
(void*)src, &one, | |
(void*)dst, &column_strides); | |
} | |
else if (column_strides < 0) { | |
FNAME(scopy)(&columns, | |
(void*)src, &one, | |
(void*)((float*)dst + (columns-1)*column_strides), | |
&column_strides); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
if (columns > 0) { | |
memcpy((float*)dst, (float*)src + (columns-1), sizeof(float)); | |
} | |
} | |
src += data->columns; | |
dst += data->row_strides/sizeof(float); | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void | |
nan_FLOAT_matrix(void *dst_in, const LINEARIZE_DATA_t* data) | |
{ | |
float *dst = (float *) dst_in; | |
int i,j; | |
for (i=0; i < data->rows; i++) { | |
float *cp = dst; | |
ptrdiff_t cs = data->column_strides/sizeof(float); | |
for (j=0; j< data->columns; ++j) { | |
*cp = s_nan; | |
cp += cs; | |
} | |
dst += data->row_strides/sizeof(float); | |
} | |
} | |
static inline void * | |
linearize_DOUBLE_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
double *src = (double *) src_in; | |
double *dst = (double *) dst_in; | |
if (dst) { | |
int i, j; | |
double* rv = dst; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(double)); | |
fortran_int one = 1; | |
for (i=0; i< data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(dcopy)(&columns, | |
(void*)src, &column_strides, | |
(void*)dst, &one); | |
} | |
else if (column_strides < 0) { | |
FNAME(dcopy)(&columns, | |
(void*)((double*)src + (columns-1)*column_strides), | |
&column_strides, | |
(void*)dst, &one); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
for (j = 0; j < columns; ++j) { | |
memcpy((double*)dst + j, (double*)src, sizeof(double)); | |
} | |
} | |
src += data->row_strides/sizeof(double); | |
dst += data->columns; | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void * | |
delinearize_DOUBLE_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
double *src = (double *) src_in; | |
double *dst = (double *) dst_in; | |
if (src) { | |
int i; | |
double *rv = src; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(double)); | |
fortran_int one = 1; | |
for (i=0; i < data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(dcopy)(&columns, | |
(void*)src, &one, | |
(void*)dst, &column_strides); | |
} | |
else if (column_strides < 0) { | |
FNAME(dcopy)(&columns, | |
(void*)src, &one, | |
(void*)((double*)dst + (columns-1)*column_strides), | |
&column_strides); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
if (columns > 0) { | |
memcpy((double*)dst, (double*)src + (columns-1), sizeof(double)); | |
} | |
} | |
src += data->columns; | |
dst += data->row_strides/sizeof(double); | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void | |
nan_DOUBLE_matrix(void *dst_in, const LINEARIZE_DATA_t* data) | |
{ | |
double *dst = (double *) dst_in; | |
int i,j; | |
for (i=0; i < data->rows; i++) { | |
double *cp = dst; | |
ptrdiff_t cs = data->column_strides/sizeof(double); | |
for (j=0; j< data->columns; ++j) { | |
*cp = d_nan; | |
cp += cs; | |
} | |
dst += data->row_strides/sizeof(double); | |
} | |
} | |
static inline void * | |
linearize_CFLOAT_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
COMPLEX_t *src = (COMPLEX_t *) src_in; | |
COMPLEX_t *dst = (COMPLEX_t *) dst_in; | |
if (dst) { | |
int i, j; | |
COMPLEX_t* rv = dst; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(COMPLEX_t)); | |
fortran_int one = 1; | |
for (i=0; i< data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(ccopy)(&columns, | |
(void*)src, &column_strides, | |
(void*)dst, &one); | |
} | |
else if (column_strides < 0) { | |
FNAME(ccopy)(&columns, | |
(void*)((COMPLEX_t*)src + (columns-1)*column_strides), | |
&column_strides, | |
(void*)dst, &one); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
for (j = 0; j < columns; ++j) { | |
memcpy((COMPLEX_t*)dst + j, (COMPLEX_t*)src, sizeof(COMPLEX_t)); | |
} | |
} | |
src += data->row_strides/sizeof(COMPLEX_t); | |
dst += data->columns; | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void * | |
delinearize_CFLOAT_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
COMPLEX_t *src = (COMPLEX_t *) src_in; | |
COMPLEX_t *dst = (COMPLEX_t *) dst_in; | |
if (src) { | |
int i; | |
COMPLEX_t *rv = src; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(COMPLEX_t)); | |
fortran_int one = 1; | |
for (i=0; i < data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(ccopy)(&columns, | |
(void*)src, &one, | |
(void*)dst, &column_strides); | |
} | |
else if (column_strides < 0) { | |
FNAME(ccopy)(&columns, | |
(void*)src, &one, | |
(void*)((COMPLEX_t*)dst + (columns-1)*column_strides), | |
&column_strides); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
if (columns > 0) { | |
memcpy((COMPLEX_t*)dst, (COMPLEX_t*)src + (columns-1), sizeof(COMPLEX_t)); | |
} | |
} | |
src += data->columns; | |
dst += data->row_strides/sizeof(COMPLEX_t); | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void | |
nan_CFLOAT_matrix(void *dst_in, const LINEARIZE_DATA_t* data) | |
{ | |
COMPLEX_t *dst = (COMPLEX_t *) dst_in; | |
int i,j; | |
for (i=0; i < data->rows; i++) { | |
COMPLEX_t *cp = dst; | |
ptrdiff_t cs = data->column_strides/sizeof(COMPLEX_t); | |
for (j=0; j< data->columns; ++j) { | |
*cp = c_nan; | |
cp += cs; | |
} | |
dst += data->row_strides/sizeof(COMPLEX_t); | |
} | |
} | |
static inline void * | |
linearize_CDOUBLE_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
DOUBLECOMPLEX_t *src = (DOUBLECOMPLEX_t *) src_in; | |
DOUBLECOMPLEX_t *dst = (DOUBLECOMPLEX_t *) dst_in; | |
if (dst) { | |
int i, j; | |
DOUBLECOMPLEX_t* rv = dst; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(DOUBLECOMPLEX_t)); | |
fortran_int one = 1; | |
for (i=0; i< data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(zcopy)(&columns, | |
(void*)src, &column_strides, | |
(void*)dst, &one); | |
} | |
else if (column_strides < 0) { | |
FNAME(zcopy)(&columns, | |
(void*)((DOUBLECOMPLEX_t*)src + (columns-1)*column_strides), | |
&column_strides, | |
(void*)dst, &one); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
for (j = 0; j < columns; ++j) { | |
memcpy((DOUBLECOMPLEX_t*)dst + j, (DOUBLECOMPLEX_t*)src, sizeof(DOUBLECOMPLEX_t)); | |
} | |
} | |
src += data->row_strides/sizeof(DOUBLECOMPLEX_t); | |
dst += data->columns; | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void * | |
delinearize_CDOUBLE_matrix(void *dst_in, | |
void *src_in, | |
const LINEARIZE_DATA_t* data) | |
{ | |
DOUBLECOMPLEX_t *src = (DOUBLECOMPLEX_t *) src_in; | |
DOUBLECOMPLEX_t *dst = (DOUBLECOMPLEX_t *) dst_in; | |
if (src) { | |
int i; | |
DOUBLECOMPLEX_t *rv = src; | |
fortran_int columns = (fortran_int)data->columns; | |
fortran_int column_strides = | |
(fortran_int)(data->column_strides/sizeof(DOUBLECOMPLEX_t)); | |
fortran_int one = 1; | |
for (i=0; i < data->rows; i++) { | |
if (column_strides > 0) { | |
FNAME(zcopy)(&columns, | |
(void*)src, &one, | |
(void*)dst, &column_strides); | |
} | |
else if (column_strides < 0) { | |
FNAME(zcopy)(&columns, | |
(void*)src, &one, | |
(void*)((DOUBLECOMPLEX_t*)dst + (columns-1)*column_strides), | |
&column_strides); | |
} | |
else { | |
/* | |
* Zero stride has undefined behavior in some BLAS | |
* implementations (e.g. OSX Accelerate), so do it | |
* manually | |
*/ | |
if (columns > 0) { | |
memcpy((DOUBLECOMPLEX_t*)dst, (DOUBLECOMPLEX_t*)src + (columns-1), sizeof(DOUBLECOMPLEX_t)); | |
} | |
} | |
src += data->columns; | |
dst += data->row_strides/sizeof(DOUBLECOMPLEX_t); | |
} | |
return rv; | |
} else { | |
return src; | |
} | |
} | |
static inline void | |
nan_CDOUBLE_matrix(void *dst_in, const LINEARIZE_DATA_t* data) | |
{ | |
DOUBLECOMPLEX_t *dst = (DOUBLECOMPLEX_t *) dst_in; | |
int i,j; | |
for (i=0; i < data->rows; i++) { | |
DOUBLECOMPLEX_t *cp = dst; | |
ptrdiff_t cs = data->column_strides/sizeof(DOUBLECOMPLEX_t); | |
for (j=0; j< data->columns; ++j) { | |
*cp = z_nan; | |
cp += cs; | |
} | |
dst += data->row_strides/sizeof(DOUBLECOMPLEX_t); | |
} | |
} | |
/* identity square matrix generation */ | |
static inline void | |
identity_FLOAT_matrix(void *ptr, size_t n) | |
{ | |
size_t i; | |
float *matrix = (float*) ptr; | |
/* in IEEE floating point, zeroes are represented as bitwise 0 */ | |
memset(matrix, 0, n*n*sizeof(float)); | |
for (i = 0; i < n; ++i) | |
{ | |
*matrix = s_one; | |
matrix += n+1; | |
} | |
} | |
static inline void | |
identity_DOUBLE_matrix(void *ptr, size_t n) | |
{ | |
size_t i; | |
double *matrix = (double*) ptr; | |
/* in IEEE floating point, zeroes are represented as bitwise 0 */ | |
memset(matrix, 0, n*n*sizeof(double)); | |
for (i = 0; i < n; ++i) | |
{ | |
*matrix = d_one; | |
matrix += n+1; | |
} | |
} | |
static inline void | |
identity_CFLOAT_matrix(void *ptr, size_t n) | |
{ | |
size_t i; | |
COMPLEX_t *matrix = (COMPLEX_t*) ptr; | |
/* in IEEE floating point, zeroes are represented as bitwise 0 */ | |
memset(matrix, 0, n*n*sizeof(COMPLEX_t)); | |
for (i = 0; i < n; ++i) | |
{ | |
*matrix = c_one; | |
matrix += n+1; | |
} | |
} | |
static inline void | |
identity_CDOUBLE_matrix(void *ptr, size_t n) | |
{ | |
size_t i; | |
DOUBLECOMPLEX_t *matrix = (DOUBLECOMPLEX_t*) ptr; | |
/* in IEEE floating point, zeroes are represented as bitwise 0 */ | |
memset(matrix, 0, n*n*sizeof(DOUBLECOMPLEX_t)); | |
for (i = 0; i < n; ++i) | |
{ | |
*matrix = z_one; | |
matrix += n+1; | |
} | |
} | |
/* lower/upper triangular matrix using blas (in place) */ | |
static inline void | |
triu_FLOAT_matrix(void *ptr, size_t n) | |
{ | |
size_t i,j; | |
float *matrix = (float*)ptr; | |
matrix += n; | |
for (i=1; i < n; ++i) { | |
for (j=0; j<i; ++j) { | |
matrix[j] = s_zero; | |
} | |
matrix += n; | |
} | |
} | |
static inline void | |
triu_DOUBLE_matrix(void *ptr, size_t n) | |
{ | |
size_t i,j; | |
double *matrix = (double*)ptr; | |
matrix += n; | |
for (i=1; i < n; ++i) { | |
for (j=0; j<i; ++j) { | |
matrix[j] = d_zero; | |
} | |
matrix += n; | |
} | |
} | |
static inline void | |
triu_CFLOAT_matrix(void *ptr, size_t n) | |
{ | |
size_t i,j; | |
COMPLEX_t *matrix = (COMPLEX_t*)ptr; | |
matrix += n; | |
for (i=1; i < n; ++i) { | |
for (j=0; j<i; ++j) { | |
matrix[j] = c_zero; | |
} | |
matrix += n; | |
} | |
} | |
static inline void | |
triu_CDOUBLE_matrix(void *ptr, size_t n) | |
{ | |
size_t i,j; | |
DOUBLECOMPLEX_t *matrix = (DOUBLECOMPLEX_t*)ptr; | |
matrix += n; | |
for (i=1; i < n; ++i) { | |
for (j=0; j<i; ++j) { | |
matrix[j] = z_zero; | |
} | |
matrix += n; | |
} | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* Determinants */ | |
static inline void | |
FLOAT_slogdet_from_factored_diagonal(npy_float* src, | |
fortran_int m, | |
npy_float *sign, | |
npy_float *logdet) | |
{ | |
npy_float acc_sign = *sign; | |
npy_float acc_logdet = 0.0f; | |
int i; | |
for (i = 0; i < m; i++) { | |
npy_float abs_element = *src; | |
if (abs_element < 0.0f) { | |
acc_sign = -acc_sign; | |
abs_element = -abs_element; | |
} | |
acc_logdet += npy_logf(abs_element); | |
src += m+1; | |
} | |
*sign = acc_sign; | |
*logdet = acc_logdet; | |
} | |
static inline npy_float | |
FLOAT_det_from_slogdet(npy_float sign, npy_float logdet) | |
{ | |
npy_float result = sign * npy_expf(logdet); | |
return result; | |
} | |
static inline void | |
DOUBLE_slogdet_from_factored_diagonal(npy_double* src, | |
fortran_int m, | |
npy_double *sign, | |
npy_double *logdet) | |
{ | |
npy_double acc_sign = *sign; | |
npy_double acc_logdet = 0.0; | |
int i; | |
for (i = 0; i < m; i++) { | |
npy_double abs_element = *src; | |
if (abs_element < 0.0) { | |
acc_sign = -acc_sign; | |
abs_element = -abs_element; | |
} | |
acc_logdet += npy_log(abs_element); | |
src += m+1; | |
} | |
*sign = acc_sign; | |
*logdet = acc_logdet; | |
} | |
static inline npy_double | |
DOUBLE_det_from_slogdet(npy_double sign, npy_double logdet) | |
{ | |
npy_double result = sign * npy_exp(logdet); | |
return result; | |
} | |
static inline npy_cfloat | |
CFLOAT_mult(npy_cfloat op1, npy_cfloat op2) | |
{ | |
npy_cfloat rv; | |
RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2); | |
IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2); | |
return rv; | |
} | |
static inline void | |
CFLOAT_slogdet_from_factored_diagonal(npy_cfloat* src, | |
fortran_int m, | |
npy_cfloat *sign, | |
npy_float *logdet) | |
{ | |
int i; | |
npy_cfloat sign_acc = *sign; | |
npy_float logdet_acc = 0.0f; | |
for (i = 0; i < m; i++) | |
{ | |
npy_float abs_element = npy_cabsf(*src); | |
npy_cfloat sign_element; | |
RE(sign_element) = RE(*src) / abs_element; | |
IM(sign_element) = IM(*src) / abs_element; | |
sign_acc = CFLOAT_mult(sign_acc, sign_element); | |
logdet_acc += npy_logf(abs_element); | |
src += m + 1; | |
} | |
*sign = sign_acc; | |
*logdet = logdet_acc; | |
} | |
static inline npy_cfloat | |
CFLOAT_det_from_slogdet(npy_cfloat sign, npy_float logdet) | |
{ | |
npy_cfloat tmp; | |
RE(tmp) = npy_expf(logdet); | |
IM(tmp) = 0.0f; | |
return CFLOAT_mult(sign, tmp); | |
} | |
static inline npy_cdouble | |
CDOUBLE_mult(npy_cdouble op1, npy_cdouble op2) | |
{ | |
npy_cdouble rv; | |
RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2); | |
IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2); | |
return rv; | |
} | |
static inline void | |
CDOUBLE_slogdet_from_factored_diagonal(npy_cdouble* src, | |
fortran_int m, | |
npy_cdouble *sign, | |
npy_double *logdet) | |
{ | |
int i; | |
npy_cdouble sign_acc = *sign; | |
npy_double logdet_acc = 0.0; | |
for (i = 0; i < m; i++) | |
{ | |
npy_double abs_element = npy_cabs(*src); | |
npy_cdouble sign_element; | |
RE(sign_element) = RE(*src) / abs_element; | |
IM(sign_element) = IM(*src) / abs_element; | |
sign_acc = CDOUBLE_mult(sign_acc, sign_element); | |
logdet_acc += npy_log(abs_element); | |
src += m + 1; | |
} | |
*sign = sign_acc; | |
*logdet = logdet_acc; | |
} | |
static inline npy_cdouble | |
CDOUBLE_det_from_slogdet(npy_cdouble sign, npy_double logdet) | |
{ | |
npy_cdouble tmp; | |
RE(tmp) = npy_exp(logdet); | |
IM(tmp) = 0.0; | |
return CDOUBLE_mult(sign, tmp); | |
} | |
/* As in the linalg package, the determinant is computed via LU factorization | |
* using LAPACK. | |
* slogdet computes sign + log(determinant). | |
* det computes sign * exp(slogdet). | |
*/ | |
static inline void | |
FLOAT_slogdet_single_element(fortran_int m, | |
void* src, | |
fortran_int* pivots, | |
npy_float *sign, | |
npy_float *logdet) | |
{ | |
fortran_int info = 0; | |
int i; | |
/* note: done in place */ | |
LAPACK(sgetrf)(&m, &m, (void *)src, &m, pivots, &info); | |
if (info == 0) | |
{ | |
int change_sign = 0; | |
/* note: fortran uses 1 based indexing */ | |
for (i=0; i < m; i++) | |
{ | |
change_sign += (pivots[i] != (i+1)); | |
} | |
memcpy(sign, | |
(change_sign % 2)? | |
&s_minus_one : | |
&s_one | |
, sizeof(*sign)); | |
FLOAT_slogdet_from_factored_diagonal(src, m, sign, logdet); | |
} else { | |
/* | |
if getrf fails, use 0 as sign and -inf as logdet | |
*/ | |
memcpy(sign, &s_zero, sizeof(*sign)); | |
memcpy(logdet, &s_ninf, sizeof(*logdet)); | |
} | |
} | |
static void | |
FLOAT_slogdet(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff = NULL; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_3 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_float); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_3 | |
linearize_FLOAT_matrix(tmp_buff, args[0], &lin_data); | |
FLOAT_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
(npy_float*)args[1], | |
(npy_float*)args[2]); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static void | |
FLOAT_det(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_2 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_float); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
npy_float sign; | |
npy_float logdet; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_2 | |
linearize_FLOAT_matrix(tmp_buff, args[0], &lin_data); | |
FLOAT_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
&sign, | |
&logdet); | |
*(npy_float *)args[1] = FLOAT_det_from_slogdet(sign, logdet); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static inline void | |
DOUBLE_slogdet_single_element(fortran_int m, | |
void* src, | |
fortran_int* pivots, | |
npy_double *sign, | |
npy_double *logdet) | |
{ | |
fortran_int info = 0; | |
int i; | |
/* note: done in place */ | |
LAPACK(dgetrf)(&m, &m, (void *)src, &m, pivots, &info); | |
if (info == 0) | |
{ | |
int change_sign = 0; | |
/* note: fortran uses 1 based indexing */ | |
for (i=0; i < m; i++) | |
{ | |
change_sign += (pivots[i] != (i+1)); | |
} | |
memcpy(sign, | |
(change_sign % 2)? | |
&d_minus_one : | |
&d_one | |
, sizeof(*sign)); | |
DOUBLE_slogdet_from_factored_diagonal(src, m, sign, logdet); | |
} else { | |
/* | |
if getrf fails, use 0 as sign and -inf as logdet | |
*/ | |
memcpy(sign, &d_zero, sizeof(*sign)); | |
memcpy(logdet, &d_ninf, sizeof(*logdet)); | |
} | |
} | |
static void | |
DOUBLE_slogdet(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff = NULL; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_3 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_double); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_3 | |
linearize_DOUBLE_matrix(tmp_buff, args[0], &lin_data); | |
DOUBLE_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
(npy_double*)args[1], | |
(npy_double*)args[2]); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static void | |
DOUBLE_det(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_2 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_double); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
npy_double sign; | |
npy_double logdet; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_2 | |
linearize_DOUBLE_matrix(tmp_buff, args[0], &lin_data); | |
DOUBLE_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
&sign, | |
&logdet); | |
*(npy_double *)args[1] = DOUBLE_det_from_slogdet(sign, logdet); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static inline void | |
CFLOAT_slogdet_single_element(fortran_int m, | |
void* src, | |
fortran_int* pivots, | |
npy_cfloat *sign, | |
npy_float *logdet) | |
{ | |
fortran_int info = 0; | |
int i; | |
/* note: done in place */ | |
LAPACK(cgetrf)(&m, &m, (void *)src, &m, pivots, &info); | |
if (info == 0) | |
{ | |
int change_sign = 0; | |
/* note: fortran uses 1 based indexing */ | |
for (i=0; i < m; i++) | |
{ | |
change_sign += (pivots[i] != (i+1)); | |
} | |
memcpy(sign, | |
(change_sign % 2)? | |
&c_minus_one : | |
&c_one | |
, sizeof(*sign)); | |
CFLOAT_slogdet_from_factored_diagonal(src, m, sign, logdet); | |
} else { | |
/* | |
if getrf fails, use 0 as sign and -inf as logdet | |
*/ | |
memcpy(sign, &c_zero, sizeof(*sign)); | |
memcpy(logdet, &c_ninf, sizeof(*logdet)); | |
} | |
} | |
static void | |
CFLOAT_slogdet(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff = NULL; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_3 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_cfloat); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_3 | |
linearize_CFLOAT_matrix(tmp_buff, args[0], &lin_data); | |
CFLOAT_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
(npy_cfloat*)args[1], | |
(npy_float*)args[2]); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static void | |
CFLOAT_det(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_2 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_cfloat); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
npy_cfloat sign; | |
npy_float logdet; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_2 | |
linearize_CFLOAT_matrix(tmp_buff, args[0], &lin_data); | |
CFLOAT_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
&sign, | |
&logdet); | |
*(npy_cfloat *)args[1] = CFLOAT_det_from_slogdet(sign, logdet); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static inline void | |
CDOUBLE_slogdet_single_element(fortran_int m, | |
void* src, | |
fortran_int* pivots, | |
npy_cdouble *sign, | |
npy_double *logdet) | |
{ | |
fortran_int info = 0; | |
int i; | |
/* note: done in place */ | |
LAPACK(zgetrf)(&m, &m, (void *)src, &m, pivots, &info); | |
if (info == 0) | |
{ | |
int change_sign = 0; | |
/* note: fortran uses 1 based indexing */ | |
for (i=0; i < m; i++) | |
{ | |
change_sign += (pivots[i] != (i+1)); | |
} | |
memcpy(sign, | |
(change_sign % 2)? | |
&z_minus_one : | |
&z_one | |
, sizeof(*sign)); | |
CDOUBLE_slogdet_from_factored_diagonal(src, m, sign, logdet); | |
} else { | |
/* | |
if getrf fails, use 0 as sign and -inf as logdet | |
*/ | |
memcpy(sign, &z_zero, sizeof(*sign)); | |
memcpy(logdet, &z_ninf, sizeof(*logdet)); | |
} | |
} | |
static void | |
CDOUBLE_slogdet(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff = NULL; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_3 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_cdouble); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_3 | |
linearize_CDOUBLE_matrix(tmp_buff, args[0], &lin_data); | |
CDOUBLE_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
(npy_cdouble*)args[1], | |
(npy_double*)args[2]); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
static void | |
CDOUBLE_det(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
fortran_int m; | |
npy_uint8 *tmp_buff; | |
size_t matrix_size; | |
size_t pivot_size; | |
/* notes: | |
* matrix will need to be copied always, as factorization in lapack is | |
* made inplace | |
* matrix will need to be in column-major order, as expected by lapack | |
* code (fortran) | |
* always a square matrix | |
* need to allocate memory for both, matrix_buffer and pivot buffer | |
*/ | |
INIT_OUTER_LOOP_2 | |
m = (fortran_int) dimensions[0]; | |
matrix_size = m*m*sizeof(npy_cdouble); | |
pivot_size = m*sizeof(fortran_int); | |
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); | |
if (tmp_buff) | |
{ | |
LINEARIZE_DATA_t lin_data; | |
npy_cdouble sign; | |
npy_double logdet; | |
/* swapped steps to get matrix in FORTRAN order */ | |
init_linearize_data(&lin_data, m, m, | |
(ptrdiff_t)steps[1], | |
(ptrdiff_t)steps[0]); | |
BEGIN_OUTER_LOOP_2 | |
linearize_CDOUBLE_matrix(tmp_buff, args[0], &lin_data); | |
CDOUBLE_slogdet_single_element(m, | |
(void*)tmp_buff, | |
(fortran_int*)(tmp_buff+matrix_size), | |
&sign, | |
&logdet); | |
*(npy_cdouble *)args[1] = CDOUBLE_det_from_slogdet(sign, logdet); | |
END_OUTER_LOOP | |
free(tmp_buff); | |
} | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* Eigh family */ | |
typedef struct eigh_params_struct { | |
void *A; /* matrix */ | |
void *W; /* eigenvalue vector */ | |
void *WORK; /* main work buffer */ | |
void *RWORK; /* secondary work buffer (for complex versions) */ | |
void *IWORK; | |
fortran_int N; | |
fortran_int LWORK; | |
fortran_int LRWORK; | |
fortran_int LIWORK; | |
char JOBZ; | |
char UPLO; | |
} EIGH_PARAMS_t; | |
/* | |
* Initialize the parameters to use in for the lapack function _syevd | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_ssyevd(EIGH_PARAMS_t* params, char JOBZ, char UPLO, | |
fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_float query_work_size; | |
fortran_int query_iwork_size; | |
fortran_int lwork = -1; | |
fortran_int liwork = -1; | |
fortran_int info; | |
npy_uint8 *a, *w, *work, *iwork; | |
size_t alloc_size = N*(N+1)*sizeof(npy_float); | |
mem_buff = malloc(alloc_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = mem_buff + N*N*sizeof(npy_float); | |
LAPACK(ssyevd)(&JOBZ, &UPLO, &N, | |
(fortran_real*)a, &N, (fortran_real*)w, | |
&query_work_size, &lwork, | |
&query_iwork_size, &liwork, | |
&info); | |
if (info != 0) | |
goto error; | |
work = mem_buff; | |
lwork = (fortran_int)query_work_size; | |
liwork = query_iwork_size; | |
mem_buff2 = malloc(lwork*sizeof(npy_float) + liwork*sizeof(fortran_int)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
iwork = mem_buff2 + lwork*sizeof(npy_float); | |
params->A = a; | |
params->W = w; | |
params->WORK = work; | |
params->RWORK = NULL; /* unused */ | |
params->IWORK = iwork; | |
params->N = N; | |
params->LWORK = lwork; | |
params->LRWORK = 0; /* unused */ | |
params->LIWORK = liwork; | |
params->JOBZ = JOBZ; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
/* something failed */ | |
memset(params, 0, sizeof(*params)); | |
free(mem_buff2); | |
free(mem_buff); | |
return 0; | |
} | |
static inline fortran_int | |
call_ssyevd(EIGH_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(ssyevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
params->A, ¶ms->N, params->W, | |
params->WORK, ¶ms->LWORK, | |
params->IWORK, ¶ms->LIWORK, | |
&rv); | |
return rv; | |
} | |
/* | |
* Initialize the parameters to use in for the lapack function _syevd | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_dsyevd(EIGH_PARAMS_t* params, char JOBZ, char UPLO, | |
fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_double query_work_size; | |
fortran_int query_iwork_size; | |
fortran_int lwork = -1; | |
fortran_int liwork = -1; | |
fortran_int info; | |
npy_uint8 *a, *w, *work, *iwork; | |
size_t alloc_size = N*(N+1)*sizeof(npy_double); | |
mem_buff = malloc(alloc_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = mem_buff + N*N*sizeof(npy_double); | |
LAPACK(dsyevd)(&JOBZ, &UPLO, &N, | |
(fortran_doublereal*)a, &N, (fortran_doublereal*)w, | |
&query_work_size, &lwork, | |
&query_iwork_size, &liwork, | |
&info); | |
if (info != 0) | |
goto error; | |
work = mem_buff; | |
lwork = (fortran_int)query_work_size; | |
liwork = query_iwork_size; | |
mem_buff2 = malloc(lwork*sizeof(npy_double) + liwork*sizeof(fortran_int)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
iwork = mem_buff2 + lwork*sizeof(npy_double); | |
params->A = a; | |
params->W = w; | |
params->WORK = work; | |
params->RWORK = NULL; /* unused */ | |
params->IWORK = iwork; | |
params->N = N; | |
params->LWORK = lwork; | |
params->LRWORK = 0; /* unused */ | |
params->LIWORK = liwork; | |
params->JOBZ = JOBZ; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
/* something failed */ | |
memset(params, 0, sizeof(*params)); | |
free(mem_buff2); | |
free(mem_buff); | |
return 0; | |
} | |
static inline fortran_int | |
call_dsyevd(EIGH_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(dsyevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
params->A, ¶ms->N, params->W, | |
params->WORK, ¶ms->LWORK, | |
params->IWORK, ¶ms->LIWORK, | |
&rv); | |
return rv; | |
} | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_cheevd(EIGH_PARAMS_t *params, | |
char JOBZ, | |
char UPLO, | |
fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
fortran_complex query_work_size; | |
fortran_real query_rwork_size; | |
fortran_int query_iwork_size; | |
fortran_int lwork = -1; | |
fortran_int lrwork = -1; | |
fortran_int liwork = -1; | |
npy_uint8 *a, *w, *work, *rwork, *iwork; | |
fortran_int info; | |
mem_buff = malloc(N*N*sizeof(npy_cfloat)+N*sizeof(npy_float)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = mem_buff+N*N*sizeof(npy_cfloat); | |
LAPACK(cheevd)(&JOBZ, &UPLO, &N, | |
(fortran_complex*)a, &N, (fortran_real*)w, | |
&query_work_size, &lwork, | |
&query_rwork_size, &lrwork, | |
&query_iwork_size, &liwork, | |
&info); | |
if (info != 0) | |
goto error; | |
lwork = (fortran_int)*(fortran_real*)&query_work_size; | |
lrwork = (fortran_int)query_rwork_size; | |
liwork = query_iwork_size; | |
mem_buff2 = malloc(lwork*sizeof(npy_cfloat) + | |
lrwork*sizeof(npy_float) + | |
liwork*sizeof(fortran_int)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
rwork = work + lwork*sizeof(npy_cfloat); | |
iwork = rwork + lrwork*sizeof(npy_float); | |
params->A = a; | |
params->W = w; | |
params->WORK = work; | |
params->RWORK = rwork; | |
params->IWORK = iwork; | |
params->N = N; | |
params->LWORK = lwork; | |
params->LRWORK = lrwork; | |
params->LIWORK = liwork; | |
params->JOBZ = JOBZ; | |
params->UPLO = UPLO; | |
return 1; | |
/* something failed */ | |
error: | |
memset(params, 0, sizeof(*params)); | |
free(mem_buff2); | |
free(mem_buff); | |
return 0; | |
} | |
static inline fortran_int | |
call_cheevd(EIGH_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(cheevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
params->A, ¶ms->N, params->W, | |
params->WORK, ¶ms->LWORK, | |
params->RWORK, ¶ms->LRWORK, | |
params->IWORK, ¶ms->LIWORK, | |
&rv); | |
return rv; | |
} | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_zheevd(EIGH_PARAMS_t *params, | |
char JOBZ, | |
char UPLO, | |
fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
fortran_doublecomplex query_work_size; | |
fortran_doublereal query_rwork_size; | |
fortran_int query_iwork_size; | |
fortran_int lwork = -1; | |
fortran_int lrwork = -1; | |
fortran_int liwork = -1; | |
npy_uint8 *a, *w, *work, *rwork, *iwork; | |
fortran_int info; | |
mem_buff = malloc(N*N*sizeof(npy_cdouble)+N*sizeof(npy_double)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = mem_buff+N*N*sizeof(npy_cdouble); | |
LAPACK(zheevd)(&JOBZ, &UPLO, &N, | |
(fortran_doublecomplex*)a, &N, (fortran_doublereal*)w, | |
&query_work_size, &lwork, | |
&query_rwork_size, &lrwork, | |
&query_iwork_size, &liwork, | |
&info); | |
if (info != 0) | |
goto error; | |
lwork = (fortran_int)*(fortran_doublereal*)&query_work_size; | |
lrwork = (fortran_int)query_rwork_size; | |
liwork = query_iwork_size; | |
mem_buff2 = malloc(lwork*sizeof(npy_cdouble) + | |
lrwork*sizeof(npy_double) + | |
liwork*sizeof(fortran_int)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
rwork = work + lwork*sizeof(npy_cdouble); | |
iwork = rwork + lrwork*sizeof(npy_double); | |
params->A = a; | |
params->W = w; | |
params->WORK = work; | |
params->RWORK = rwork; | |
params->IWORK = iwork; | |
params->N = N; | |
params->LWORK = lwork; | |
params->LRWORK = lrwork; | |
params->LIWORK = liwork; | |
params->JOBZ = JOBZ; | |
params->UPLO = UPLO; | |
return 1; | |
/* something failed */ | |
error: | |
memset(params, 0, sizeof(*params)); | |
free(mem_buff2); | |
free(mem_buff); | |
return 0; | |
} | |
static inline fortran_int | |
call_zheevd(EIGH_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(zheevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, | |
params->A, ¶ms->N, params->W, | |
params->WORK, ¶ms->LWORK, | |
params->RWORK, ¶ms->LRWORK, | |
params->IWORK, ¶ms->LIWORK, | |
&rv); | |
return rv; | |
} | |
/* | |
* (M,M)->(M,)(M,M) | |
* dimensions[1] -> M | |
* args[0] -> A[in] | |
* args[1] -> W | |
* args[2] -> A[out] | |
*/ | |
static inline void | |
release_ssyevd(EIGH_PARAMS_t *params) | |
{ | |
/* allocated memory in A and WORK */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
FLOAT_eigh_wrapper(char JOBZ, | |
char UPLO, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[3]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:3; | |
EIGH_PARAMS_t eigh_params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_ssyevd(&eigh_params, | |
JOBZ, | |
UPLO, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t matrix_in_ld; | |
LINEARIZE_DATA_t eigenvectors_out_ld; | |
LINEARIZE_DATA_t eigenvalues_out_ld; | |
init_linearize_data(&matrix_in_ld, | |
eigh_params.N, eigh_params.N, | |
steps[1], steps[0]); | |
init_linearize_data(&eigenvalues_out_ld, | |
1, eigh_params.N, | |
0, steps[2]); | |
if ('V' == eigh_params.JOBZ) { | |
init_linearize_data(&eigenvectors_out_ld, | |
eigh_params.N, eigh_params.N, | |
steps[4], steps[3]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_FLOAT_matrix(eigh_params.A, args[0], &matrix_in_ld); | |
not_ok = call_ssyevd(&eigh_params); | |
if (!not_ok) { | |
/* lapack ok, copy result out */ | |
delinearize_FLOAT_matrix(args[1], | |
eigh_params.W, | |
&eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
delinearize_FLOAT_matrix(args[2], | |
eigh_params.A, | |
&eigenvectors_out_ld); | |
} | |
} else { | |
/* lapack fail, set result to nan */ | |
error_occurred = 1; | |
nan_FLOAT_matrix(args[1], &eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
nan_FLOAT_matrix(args[2], &eigenvectors_out_ld); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_ssyevd(&eigh_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/* | |
* (M,M)->(M,)(M,M) | |
* dimensions[1] -> M | |
* args[0] -> A[in] | |
* args[1] -> W | |
* args[2] -> A[out] | |
*/ | |
static inline void | |
release_dsyevd(EIGH_PARAMS_t *params) | |
{ | |
/* allocated memory in A and WORK */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
DOUBLE_eigh_wrapper(char JOBZ, | |
char UPLO, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[3]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:3; | |
EIGH_PARAMS_t eigh_params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_dsyevd(&eigh_params, | |
JOBZ, | |
UPLO, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t matrix_in_ld; | |
LINEARIZE_DATA_t eigenvectors_out_ld; | |
LINEARIZE_DATA_t eigenvalues_out_ld; | |
init_linearize_data(&matrix_in_ld, | |
eigh_params.N, eigh_params.N, | |
steps[1], steps[0]); | |
init_linearize_data(&eigenvalues_out_ld, | |
1, eigh_params.N, | |
0, steps[2]); | |
if ('V' == eigh_params.JOBZ) { | |
init_linearize_data(&eigenvectors_out_ld, | |
eigh_params.N, eigh_params.N, | |
steps[4], steps[3]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_DOUBLE_matrix(eigh_params.A, args[0], &matrix_in_ld); | |
not_ok = call_dsyevd(&eigh_params); | |
if (!not_ok) { | |
/* lapack ok, copy result out */ | |
delinearize_DOUBLE_matrix(args[1], | |
eigh_params.W, | |
&eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
delinearize_DOUBLE_matrix(args[2], | |
eigh_params.A, | |
&eigenvectors_out_ld); | |
} | |
} else { | |
/* lapack fail, set result to nan */ | |
error_occurred = 1; | |
nan_DOUBLE_matrix(args[1], &eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
nan_DOUBLE_matrix(args[2], &eigenvectors_out_ld); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_dsyevd(&eigh_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/* | |
* (M,M)->(M,)(M,M) | |
* dimensions[1] -> M | |
* args[0] -> A[in] | |
* args[1] -> W | |
* args[2] -> A[out] | |
*/ | |
static inline void | |
release_cheevd(EIGH_PARAMS_t *params) | |
{ | |
/* allocated memory in A and WORK */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
CFLOAT_eigh_wrapper(char JOBZ, | |
char UPLO, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[3]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:3; | |
EIGH_PARAMS_t eigh_params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_cheevd(&eigh_params, | |
JOBZ, | |
UPLO, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t matrix_in_ld; | |
LINEARIZE_DATA_t eigenvectors_out_ld; | |
LINEARIZE_DATA_t eigenvalues_out_ld; | |
init_linearize_data(&matrix_in_ld, | |
eigh_params.N, eigh_params.N, | |
steps[1], steps[0]); | |
init_linearize_data(&eigenvalues_out_ld, | |
1, eigh_params.N, | |
0, steps[2]); | |
if ('V' == eigh_params.JOBZ) { | |
init_linearize_data(&eigenvectors_out_ld, | |
eigh_params.N, eigh_params.N, | |
steps[4], steps[3]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_CFLOAT_matrix(eigh_params.A, args[0], &matrix_in_ld); | |
not_ok = call_cheevd(&eigh_params); | |
if (!not_ok) { | |
/* lapack ok, copy result out */ | |
delinearize_FLOAT_matrix(args[1], | |
eigh_params.W, | |
&eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
delinearize_CFLOAT_matrix(args[2], | |
eigh_params.A, | |
&eigenvectors_out_ld); | |
} | |
} else { | |
/* lapack fail, set result to nan */ | |
error_occurred = 1; | |
nan_FLOAT_matrix(args[1], &eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
nan_CFLOAT_matrix(args[2], &eigenvectors_out_ld); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_cheevd(&eigh_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/* | |
* (M,M)->(M,)(M,M) | |
* dimensions[1] -> M | |
* args[0] -> A[in] | |
* args[1] -> W | |
* args[2] -> A[out] | |
*/ | |
static inline void | |
release_zheevd(EIGH_PARAMS_t *params) | |
{ | |
/* allocated memory in A and WORK */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
CDOUBLE_eigh_wrapper(char JOBZ, | |
char UPLO, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[3]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:3; | |
EIGH_PARAMS_t eigh_params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_zheevd(&eigh_params, | |
JOBZ, | |
UPLO, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t matrix_in_ld; | |
LINEARIZE_DATA_t eigenvectors_out_ld; | |
LINEARIZE_DATA_t eigenvalues_out_ld; | |
init_linearize_data(&matrix_in_ld, | |
eigh_params.N, eigh_params.N, | |
steps[1], steps[0]); | |
init_linearize_data(&eigenvalues_out_ld, | |
1, eigh_params.N, | |
0, steps[2]); | |
if ('V' == eigh_params.JOBZ) { | |
init_linearize_data(&eigenvectors_out_ld, | |
eigh_params.N, eigh_params.N, | |
steps[4], steps[3]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_CDOUBLE_matrix(eigh_params.A, args[0], &matrix_in_ld); | |
not_ok = call_zheevd(&eigh_params); | |
if (!not_ok) { | |
/* lapack ok, copy result out */ | |
delinearize_DOUBLE_matrix(args[1], | |
eigh_params.W, | |
&eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
delinearize_CDOUBLE_matrix(args[2], | |
eigh_params.A, | |
&eigenvectors_out_ld); | |
} | |
} else { | |
/* lapack fail, set result to nan */ | |
error_occurred = 1; | |
nan_DOUBLE_matrix(args[1], &eigenvalues_out_ld); | |
if ('V' == eigh_params.JOBZ) { | |
nan_CDOUBLE_matrix(args[2], &eigenvectors_out_ld); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_zheevd(&eigh_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
FLOAT_eighlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
FLOAT_eigh_wrapper('V', 'L', args, dimensions, steps); | |
} | |
static void | |
FLOAT_eighup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
FLOAT_eigh_wrapper('V', 'U', args, dimensions, steps); | |
} | |
static void | |
FLOAT_eigvalshlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
FLOAT_eigh_wrapper('N', 'L', args, dimensions, steps); | |
} | |
static void | |
FLOAT_eigvalshup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
FLOAT_eigh_wrapper('N', 'U', args, dimensions, steps); | |
} | |
static void | |
DOUBLE_eighlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
DOUBLE_eigh_wrapper('V', 'L', args, dimensions, steps); | |
} | |
static void | |
DOUBLE_eighup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
DOUBLE_eigh_wrapper('V', 'U', args, dimensions, steps); | |
} | |
static void | |
DOUBLE_eigvalshlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
DOUBLE_eigh_wrapper('N', 'L', args, dimensions, steps); | |
} | |
static void | |
DOUBLE_eigvalshup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
DOUBLE_eigh_wrapper('N', 'U', args, dimensions, steps); | |
} | |
static void | |
CFLOAT_eighlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CFLOAT_eigh_wrapper('V', 'L', args, dimensions, steps); | |
} | |
static void | |
CFLOAT_eighup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
CFLOAT_eigh_wrapper('V', 'U', args, dimensions, steps); | |
} | |
static void | |
CFLOAT_eigvalshlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
CFLOAT_eigh_wrapper('N', 'L', args, dimensions, steps); | |
} | |
static void | |
CFLOAT_eigvalshup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
CFLOAT_eigh_wrapper('N', 'U', args, dimensions, steps); | |
} | |
static void | |
CDOUBLE_eighlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CDOUBLE_eigh_wrapper('V', 'L', args, dimensions, steps); | |
} | |
static void | |
CDOUBLE_eighup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
CDOUBLE_eigh_wrapper('V', 'U', args, dimensions, steps); | |
} | |
static void | |
CDOUBLE_eigvalshlo(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
CDOUBLE_eigh_wrapper('N', 'L', args, dimensions, steps); | |
} | |
static void | |
CDOUBLE_eigvalshup(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void* NPY_UNUSED(func)) | |
{ | |
CDOUBLE_eigh_wrapper('N', 'U', args, dimensions, steps); | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* Solve family (includes inv) */ | |
typedef struct gesv_params_struct | |
{ | |
void *A; /* A is (N,N) of base type */ | |
void *B; /* B is (N,NRHS) of base type */ | |
fortran_int * IPIV; /* IPIV is (N) */ | |
fortran_int N; | |
fortran_int NRHS; | |
fortran_int LDA; | |
fortran_int LDB; | |
} GESV_PARAMS_t; | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_sgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a, *b, *ipiv; | |
mem_buff = malloc(N*N*sizeof(fortran_real) + | |
N*NRHS*sizeof(fortran_real) + | |
N*sizeof(fortran_int)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
b = a + N*N*sizeof(fortran_real); | |
ipiv = b + N*NRHS*sizeof(fortran_real); | |
params->A = a; | |
params->B = b; | |
params->IPIV = (fortran_int*)ipiv; | |
params->N = N; | |
params->NRHS = NRHS; | |
params->LDA = N; | |
params->LDB = N; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_sgesv(GESV_PARAMS_t *params) | |
{ | |
/* memory block base is in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_sgesv(GESV_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(sgesv)(¶ms->N, ¶ms->NRHS, | |
params->A, ¶ms->LDA, | |
params->IPIV, | |
params->B, ¶ms->LDB, | |
&rv); | |
return rv; | |
} | |
static void | |
FLOAT_solve(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n, nrhs; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
nrhs = (fortran_int)dimensions[1]; | |
if (init_sgesv(¶ms, n, nrhs)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); | |
init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_FLOAT_matrix(params.A, args[0], &a_in); | |
linearize_FLOAT_matrix(params.B, args[1], &b_in); | |
not_ok =call_sgesv(¶ms); | |
if (!not_ok) { | |
delinearize_FLOAT_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_FLOAT_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_sgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
FLOAT_solve1(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
if (init_sgesv(¶ms, n, 1)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, 1, n, 1, steps[2]); | |
init_linearize_data(&r_out, 1, n, 1, steps[3]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_FLOAT_matrix(params.A, args[0], &a_in); | |
linearize_FLOAT_matrix(params.B, args[1], &b_in); | |
not_ok = call_sgesv(¶ms); | |
if (!not_ok) { | |
delinearize_FLOAT_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_FLOAT_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_sgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
FLOAT_inv(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_2 | |
n = (fortran_int)dimensions[0]; | |
if (init_sgesv(¶ms, n, n)) { | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_FLOAT_matrix(params.A, args[0], &a_in); | |
identity_FLOAT_matrix(params.B, n); | |
not_ok = call_sgesv(¶ms); | |
if (!not_ok) { | |
delinearize_FLOAT_matrix(args[1], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_FLOAT_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_sgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_dgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a, *b, *ipiv; | |
mem_buff = malloc(N*N*sizeof(fortran_doublereal) + | |
N*NRHS*sizeof(fortran_doublereal) + | |
N*sizeof(fortran_int)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
b = a + N*N*sizeof(fortran_doublereal); | |
ipiv = b + N*NRHS*sizeof(fortran_doublereal); | |
params->A = a; | |
params->B = b; | |
params->IPIV = (fortran_int*)ipiv; | |
params->N = N; | |
params->NRHS = NRHS; | |
params->LDA = N; | |
params->LDB = N; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_dgesv(GESV_PARAMS_t *params) | |
{ | |
/* memory block base is in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_dgesv(GESV_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(dgesv)(¶ms->N, ¶ms->NRHS, | |
params->A, ¶ms->LDA, | |
params->IPIV, | |
params->B, ¶ms->LDB, | |
&rv); | |
return rv; | |
} | |
static void | |
DOUBLE_solve(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n, nrhs; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
nrhs = (fortran_int)dimensions[1]; | |
if (init_dgesv(¶ms, n, nrhs)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); | |
init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_DOUBLE_matrix(params.A, args[0], &a_in); | |
linearize_DOUBLE_matrix(params.B, args[1], &b_in); | |
not_ok =call_dgesv(¶ms); | |
if (!not_ok) { | |
delinearize_DOUBLE_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_DOUBLE_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_dgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
DOUBLE_solve1(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
if (init_dgesv(¶ms, n, 1)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, 1, n, 1, steps[2]); | |
init_linearize_data(&r_out, 1, n, 1, steps[3]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_DOUBLE_matrix(params.A, args[0], &a_in); | |
linearize_DOUBLE_matrix(params.B, args[1], &b_in); | |
not_ok = call_dgesv(¶ms); | |
if (!not_ok) { | |
delinearize_DOUBLE_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_DOUBLE_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_dgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
DOUBLE_inv(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_2 | |
n = (fortran_int)dimensions[0]; | |
if (init_dgesv(¶ms, n, n)) { | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_DOUBLE_matrix(params.A, args[0], &a_in); | |
identity_DOUBLE_matrix(params.B, n); | |
not_ok = call_dgesv(¶ms); | |
if (!not_ok) { | |
delinearize_DOUBLE_matrix(args[1], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_DOUBLE_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_dgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_cgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a, *b, *ipiv; | |
mem_buff = malloc(N*N*sizeof(fortran_complex) + | |
N*NRHS*sizeof(fortran_complex) + | |
N*sizeof(fortran_int)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
b = a + N*N*sizeof(fortran_complex); | |
ipiv = b + N*NRHS*sizeof(fortran_complex); | |
params->A = a; | |
params->B = b; | |
params->IPIV = (fortran_int*)ipiv; | |
params->N = N; | |
params->NRHS = NRHS; | |
params->LDA = N; | |
params->LDB = N; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_cgesv(GESV_PARAMS_t *params) | |
{ | |
/* memory block base is in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_cgesv(GESV_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(cgesv)(¶ms->N, ¶ms->NRHS, | |
params->A, ¶ms->LDA, | |
params->IPIV, | |
params->B, ¶ms->LDB, | |
&rv); | |
return rv; | |
} | |
static void | |
CFLOAT_solve(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n, nrhs; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
nrhs = (fortran_int)dimensions[1]; | |
if (init_cgesv(¶ms, n, nrhs)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); | |
init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_CFLOAT_matrix(params.A, args[0], &a_in); | |
linearize_CFLOAT_matrix(params.B, args[1], &b_in); | |
not_ok =call_cgesv(¶ms); | |
if (!not_ok) { | |
delinearize_CFLOAT_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CFLOAT_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_cgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
CFLOAT_solve1(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
if (init_cgesv(¶ms, n, 1)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, 1, n, 1, steps[2]); | |
init_linearize_data(&r_out, 1, n, 1, steps[3]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_CFLOAT_matrix(params.A, args[0], &a_in); | |
linearize_CFLOAT_matrix(params.B, args[1], &b_in); | |
not_ok = call_cgesv(¶ms); | |
if (!not_ok) { | |
delinearize_CFLOAT_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CFLOAT_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_cgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
CFLOAT_inv(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_2 | |
n = (fortran_int)dimensions[0]; | |
if (init_cgesv(¶ms, n, n)) { | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_CFLOAT_matrix(params.A, args[0], &a_in); | |
identity_CFLOAT_matrix(params.B, n); | |
not_ok = call_cgesv(¶ms); | |
if (!not_ok) { | |
delinearize_CFLOAT_matrix(args[1], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CFLOAT_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_cgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/* | |
* Initialize the parameters to use in for the lapack function _heev | |
* Handles buffer allocation | |
*/ | |
static inline int | |
init_zgesv(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a, *b, *ipiv; | |
mem_buff = malloc(N*N*sizeof(fortran_doublecomplex) + | |
N*NRHS*sizeof(fortran_doublecomplex) + | |
N*sizeof(fortran_int)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
b = a + N*N*sizeof(fortran_doublecomplex); | |
ipiv = b + N*NRHS*sizeof(fortran_doublecomplex); | |
params->A = a; | |
params->B = b; | |
params->IPIV = (fortran_int*)ipiv; | |
params->N = N; | |
params->NRHS = NRHS; | |
params->LDA = N; | |
params->LDB = N; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_zgesv(GESV_PARAMS_t *params) | |
{ | |
/* memory block base is in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_zgesv(GESV_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(zgesv)(¶ms->N, ¶ms->NRHS, | |
params->A, ¶ms->LDA, | |
params->IPIV, | |
params->B, ¶ms->LDB, | |
&rv); | |
return rv; | |
} | |
static void | |
CDOUBLE_solve(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n, nrhs; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
nrhs = (fortran_int)dimensions[1]; | |
if (init_zgesv(¶ms, n, nrhs)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); | |
init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_CDOUBLE_matrix(params.A, args[0], &a_in); | |
linearize_CDOUBLE_matrix(params.B, args[1], &b_in); | |
not_ok =call_zgesv(¶ms); | |
if (!not_ok) { | |
delinearize_CDOUBLE_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CDOUBLE_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_zgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
CDOUBLE_solve1(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_3 | |
n = (fortran_int)dimensions[0]; | |
if (init_zgesv(¶ms, n, 1)) { | |
LINEARIZE_DATA_t a_in, b_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&b_in, 1, n, 1, steps[2]); | |
init_linearize_data(&r_out, 1, n, 1, steps[3]); | |
BEGIN_OUTER_LOOP_3 | |
int not_ok; | |
linearize_CDOUBLE_matrix(params.A, args[0], &a_in); | |
linearize_CDOUBLE_matrix(params.B, args[1], &b_in); | |
not_ok = call_zgesv(¶ms); | |
if (!not_ok) { | |
delinearize_CDOUBLE_matrix(args[2], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CDOUBLE_matrix(args[2], &r_out); | |
} | |
END_OUTER_LOOP | |
release_zgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
CDOUBLE_inv(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
GESV_PARAMS_t params; | |
fortran_int n; | |
int error_occurred = get_fp_invalid_and_clear(); | |
INIT_OUTER_LOOP_2 | |
n = (fortran_int)dimensions[0]; | |
if (init_zgesv(¶ms, n, n)) { | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_CDOUBLE_matrix(params.A, args[0], &a_in); | |
identity_CDOUBLE_matrix(params.B, n); | |
not_ok = call_zgesv(¶ms); | |
if (!not_ok) { | |
delinearize_CDOUBLE_matrix(args[1], params.B, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CDOUBLE_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_zgesv(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* Cholesky decomposition */ | |
typedef struct potr_params_struct | |
{ | |
void *A; | |
fortran_int N; | |
fortran_int LDA; | |
char UPLO; | |
} POTR_PARAMS_t; | |
static inline int | |
init_spotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a; | |
mem_buff = malloc(N*N*sizeof(fortran_real)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
params->A = a; | |
params->N = N; | |
params->LDA = N; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_spotrf(POTR_PARAMS_t *params) | |
{ | |
/* memory block base in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_spotrf(POTR_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(spotrf)(¶ms->UPLO, | |
¶ms->N, params->A, ¶ms->LDA, | |
&rv); | |
return rv; | |
} | |
static void | |
FLOAT_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) | |
{ | |
POTR_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_2 | |
assert(uplo == 'L'); | |
n = (fortran_int)dimensions[0]; | |
if (init_spotrf(¶ms, uplo, n)) | |
{ | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_FLOAT_matrix(params.A, args[0], &a_in); | |
not_ok = call_spotrf(¶ms); | |
if (!not_ok) { | |
triu_FLOAT_matrix(params.A, params.N); | |
delinearize_FLOAT_matrix(args[1], params.A, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_FLOAT_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_spotrf(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
FLOAT_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
FLOAT_cholesky('L', args, dimensions, steps); | |
} | |
static inline int | |
init_dpotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a; | |
mem_buff = malloc(N*N*sizeof(fortran_doublereal)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
params->A = a; | |
params->N = N; | |
params->LDA = N; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_dpotrf(POTR_PARAMS_t *params) | |
{ | |
/* memory block base in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_dpotrf(POTR_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(dpotrf)(¶ms->UPLO, | |
¶ms->N, params->A, ¶ms->LDA, | |
&rv); | |
return rv; | |
} | |
static void | |
DOUBLE_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) | |
{ | |
POTR_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_2 | |
assert(uplo == 'L'); | |
n = (fortran_int)dimensions[0]; | |
if (init_dpotrf(¶ms, uplo, n)) | |
{ | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_DOUBLE_matrix(params.A, args[0], &a_in); | |
not_ok = call_dpotrf(¶ms); | |
if (!not_ok) { | |
triu_DOUBLE_matrix(params.A, params.N); | |
delinearize_DOUBLE_matrix(args[1], params.A, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_DOUBLE_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_dpotrf(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
DOUBLE_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
DOUBLE_cholesky('L', args, dimensions, steps); | |
} | |
static inline int | |
init_cpotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a; | |
mem_buff = malloc(N*N*sizeof(fortran_complex)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
params->A = a; | |
params->N = N; | |
params->LDA = N; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_cpotrf(POTR_PARAMS_t *params) | |
{ | |
/* memory block base in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_cpotrf(POTR_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(cpotrf)(¶ms->UPLO, | |
¶ms->N, params->A, ¶ms->LDA, | |
&rv); | |
return rv; | |
} | |
static void | |
CFLOAT_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) | |
{ | |
POTR_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_2 | |
assert(uplo == 'L'); | |
n = (fortran_int)dimensions[0]; | |
if (init_cpotrf(¶ms, uplo, n)) | |
{ | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_CFLOAT_matrix(params.A, args[0], &a_in); | |
not_ok = call_cpotrf(¶ms); | |
if (!not_ok) { | |
triu_CFLOAT_matrix(params.A, params.N); | |
delinearize_CFLOAT_matrix(args[1], params.A, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CFLOAT_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_cpotrf(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
CFLOAT_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CFLOAT_cholesky('L', args, dimensions, steps); | |
} | |
static inline int | |
init_zpotrf(POTR_PARAMS_t *params, char UPLO, fortran_int N) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *a; | |
mem_buff = malloc(N*N*sizeof(fortran_doublecomplex)); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
params->A = a; | |
params->N = N; | |
params->LDA = N; | |
params->UPLO = UPLO; | |
return 1; | |
error: | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline void | |
release_zpotrf(POTR_PARAMS_t *params) | |
{ | |
/* memory block base in A */ | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline fortran_int | |
call_zpotrf(POTR_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(zpotrf)(¶ms->UPLO, | |
¶ms->N, params->A, ¶ms->LDA, | |
&rv); | |
return rv; | |
} | |
static void | |
CDOUBLE_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps) | |
{ | |
POTR_PARAMS_t params; | |
int error_occurred = get_fp_invalid_and_clear(); | |
fortran_int n; | |
INIT_OUTER_LOOP_2 | |
assert(uplo == 'L'); | |
n = (fortran_int)dimensions[0]; | |
if (init_zpotrf(¶ms, uplo, n)) | |
{ | |
LINEARIZE_DATA_t a_in, r_out; | |
init_linearize_data(&a_in, n, n, steps[1], steps[0]); | |
init_linearize_data(&r_out, n, n, steps[3], steps[2]); | |
BEGIN_OUTER_LOOP_2 | |
int not_ok; | |
linearize_CDOUBLE_matrix(params.A, args[0], &a_in); | |
not_ok = call_zpotrf(¶ms); | |
if (!not_ok) { | |
triu_CDOUBLE_matrix(params.A, params.N); | |
delinearize_CDOUBLE_matrix(args[1], params.A, &r_out); | |
} else { | |
error_occurred = 1; | |
nan_CDOUBLE_matrix(args[1], &r_out); | |
} | |
END_OUTER_LOOP | |
release_zpotrf(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
CDOUBLE_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CDOUBLE_cholesky('L', args, dimensions, steps); | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* eig family */ | |
typedef struct geev_params_struct { | |
void *A; | |
void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/ | |
void *WI; | |
void *VLR; /* REAL VL buffers for _geev where _ is s, d */ | |
void *VRR; /* REAL VR buffers for _geev hwere _ is s, d */ | |
void *WORK; | |
void *W; /* final w */ | |
void *VL; /* final vl */ | |
void *VR; /* final vr */ | |
fortran_int N; | |
fortran_int LDA; | |
fortran_int LDVL; | |
fortran_int LDVR; | |
fortran_int LWORK; | |
char JOBVL; | |
char JOBVR; | |
} GEEV_PARAMS_t; | |
static inline void | |
dump_geev_params(const char *name, GEEV_PARAMS_t* params) | |
{ | |
TRACE_TXT("\n%s\n" | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %p\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %d\n"\ | |
"\t%10s: %c\n"\ | |
"\t%10s: %c\n", | |
name, | |
"A", params->A, | |
"WR", params->WR, | |
"WI", params->WI, | |
"VLR", params->VLR, | |
"VRR", params->VRR, | |
"WORK", params->WORK, | |
"W", params->W, | |
"VL", params->VL, | |
"VR", params->VR, | |
"N", (int)params->N, | |
"LDA", (int)params->LDA, | |
"LDVL", (int)params->LDVL, | |
"LDVR", (int)params->LDVR, | |
"LWORK", (int)params->LWORK, | |
"JOBVL", params->JOBVL, | |
"JOBVR", params->JOBVR); | |
} | |
static inline int | |
init_sgeev(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) | |
{ | |
npy_uint8 *mem_buff=NULL; | |
npy_uint8 *mem_buff2=NULL; | |
npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; | |
size_t a_size = n*n*sizeof(float); | |
size_t wr_size = n*sizeof(float); | |
size_t wi_size = n*sizeof(float); | |
size_t vlr_size = jobvl=='V' ? n*n*sizeof(float) : 0; | |
size_t vrr_size = jobvr=='V' ? n*n*sizeof(float) : 0; | |
size_t w_size = wr_size*2; | |
size_t vl_size = vlr_size*2; | |
size_t vr_size = vrr_size*2; | |
size_t work_count = 0; | |
float work_size_query; | |
fortran_int do_size_query = -1; | |
fortran_int rv; | |
/* allocate data for known sizes (all but work) */ | |
mem_buff = malloc(a_size + wr_size + wi_size + | |
vlr_size + vrr_size + | |
w_size + vl_size + vr_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
wr = a + a_size; | |
wi = wr + wr_size; | |
vlr = wi + wi_size; | |
vrr = vlr + vlr_size; | |
w = vrr + vrr_size; | |
vl = w + w_size; | |
vr = vl + vl_size; | |
LAPACK(sgeev)(&jobvl, &jobvr, &n, | |
(void *)a, &n, (void *)wr, (void *)wi, | |
(void *)vl, &n, (void *)vr, &n, | |
&work_size_query, &do_size_query, | |
&rv); | |
if (0 != rv) | |
goto error; | |
work_count = (size_t)work_size_query; | |
mem_buff2 = malloc(work_count*sizeof(float)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->WR = wr; | |
params->WI = wi; | |
params->VLR = vlr; | |
params->VRR = vrr; | |
params->WORK = work; | |
params->W = w; | |
params->VL = vl; | |
params->VR = vr; | |
params->N = n; | |
params->LDA = n; | |
params->LDVL = n; | |
params->LDVR = n; | |
params->LWORK = (fortran_int)work_count; | |
params->JOBVL = jobvl; | |
params->JOBVR = jobvr; | |
return 1; | |
error: | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_sgeev(GEEV_PARAMS_t* params) | |
{ | |
fortran_int rv; | |
LAPACK(sgeev)(¶ms->JOBVL, ¶ms->JOBVR, | |
¶ms->N, params->A, ¶ms->LDA, | |
params->WR, params->WI, | |
params->VLR, ¶ms->LDVL, | |
params->VRR, ¶ms->LDVR, | |
params->WORK, ¶ms->LWORK, | |
&rv); | |
return rv; | |
} | |
static inline void | |
mk_FLOAT_complex_array_from_real(COMPLEX_t *c, const float *re, size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
c[iter].array[0] = re[iter]; | |
c[iter].array[1] = 0.0f; | |
} | |
} | |
static inline void | |
mk_FLOAT_complex_array(COMPLEX_t *c, | |
const float *re, | |
const float *im, | |
size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
c[iter].array[0] = re[iter]; | |
c[iter].array[1] = im[iter]; | |
} | |
} | |
static inline void | |
mk_FLOAT_complex_array_conjugate_pair(COMPLEX_t *c, | |
const float *r, | |
size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
float re = r[iter]; | |
float im = r[iter+n]; | |
c[iter].array[0] = re; | |
c[iter].array[1] = im; | |
c[iter+n].array[0] = re; | |
c[iter+n].array[1] = -im; | |
} | |
} | |
/* | |
* make the complex eigenvectors from the real array produced by sgeev/zgeev. | |
* c is the array where the results will be left. | |
* r is the source array of reals produced by sgeev/zgeev | |
* i is the eigenvalue imaginary part produced by sgeev/zgeev | |
* n is so that the order of the matrix is n by n | |
*/ | |
static inline void | |
mk_sgeev_complex_eigenvectors(COMPLEX_t *c, | |
const float *r, | |
const float *i, | |
size_t n) | |
{ | |
size_t iter = 0; | |
while (iter < n) | |
{ | |
if (i[iter] == 0.0f) { | |
/* eigenvalue was real, eigenvectors as well... */ | |
mk_FLOAT_complex_array_from_real(c, r, n); | |
c += n; | |
r += n; | |
iter ++; | |
} else { | |
/* eigenvalue was complex, generate a pair of eigenvectors */ | |
mk_FLOAT_complex_array_conjugate_pair(c, r, n); | |
c += 2*n; | |
r += 2*n; | |
iter += 2; | |
} | |
} | |
} | |
static inline void | |
process_sgeev_results(GEEV_PARAMS_t *params) | |
{ | |
/* REAL versions of geev need the results to be translated | |
* into complex versions. This is the way to deal with imaginary | |
* results. In our gufuncs we will always return complex arrays! | |
*/ | |
mk_FLOAT_complex_array(params->W, params->WR, params->WI, params->N); | |
/* handle the eigenvectors */ | |
if ('V' == params->JOBVL) { | |
mk_sgeev_complex_eigenvectors(params->VL, params->VLR, | |
params->WI, params->N); | |
} | |
if ('V' == params->JOBVR) { | |
mk_sgeev_complex_eigenvectors(params->VR, params->VRR, | |
params->WI, params->N); | |
} | |
} | |
static inline int | |
init_dgeev(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) | |
{ | |
npy_uint8 *mem_buff=NULL; | |
npy_uint8 *mem_buff2=NULL; | |
npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; | |
size_t a_size = n*n*sizeof(double); | |
size_t wr_size = n*sizeof(double); | |
size_t wi_size = n*sizeof(double); | |
size_t vlr_size = jobvl=='V' ? n*n*sizeof(double) : 0; | |
size_t vrr_size = jobvr=='V' ? n*n*sizeof(double) : 0; | |
size_t w_size = wr_size*2; | |
size_t vl_size = vlr_size*2; | |
size_t vr_size = vrr_size*2; | |
size_t work_count = 0; | |
double work_size_query; | |
fortran_int do_size_query = -1; | |
fortran_int rv; | |
/* allocate data for known sizes (all but work) */ | |
mem_buff = malloc(a_size + wr_size + wi_size + | |
vlr_size + vrr_size + | |
w_size + vl_size + vr_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
wr = a + a_size; | |
wi = wr + wr_size; | |
vlr = wi + wi_size; | |
vrr = vlr + vlr_size; | |
w = vrr + vrr_size; | |
vl = w + w_size; | |
vr = vl + vl_size; | |
LAPACK(dgeev)(&jobvl, &jobvr, &n, | |
(void *)a, &n, (void *)wr, (void *)wi, | |
(void *)vl, &n, (void *)vr, &n, | |
&work_size_query, &do_size_query, | |
&rv); | |
if (0 != rv) | |
goto error; | |
work_count = (size_t)work_size_query; | |
mem_buff2 = malloc(work_count*sizeof(double)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->WR = wr; | |
params->WI = wi; | |
params->VLR = vlr; | |
params->VRR = vrr; | |
params->WORK = work; | |
params->W = w; | |
params->VL = vl; | |
params->VR = vr; | |
params->N = n; | |
params->LDA = n; | |
params->LDVL = n; | |
params->LDVR = n; | |
params->LWORK = (fortran_int)work_count; | |
params->JOBVL = jobvl; | |
params->JOBVR = jobvr; | |
return 1; | |
error: | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_dgeev(GEEV_PARAMS_t* params) | |
{ | |
fortran_int rv; | |
LAPACK(dgeev)(¶ms->JOBVL, ¶ms->JOBVR, | |
¶ms->N, params->A, ¶ms->LDA, | |
params->WR, params->WI, | |
params->VLR, ¶ms->LDVL, | |
params->VRR, ¶ms->LDVR, | |
params->WORK, ¶ms->LWORK, | |
&rv); | |
return rv; | |
} | |
static inline void | |
mk_DOUBLE_complex_array_from_real(DOUBLECOMPLEX_t *c, const double *re, size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
c[iter].array[0] = re[iter]; | |
c[iter].array[1] = 0.0; | |
} | |
} | |
static inline void | |
mk_DOUBLE_complex_array(DOUBLECOMPLEX_t *c, | |
const double *re, | |
const double *im, | |
size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
c[iter].array[0] = re[iter]; | |
c[iter].array[1] = im[iter]; | |
} | |
} | |
static inline void | |
mk_DOUBLE_complex_array_conjugate_pair(DOUBLECOMPLEX_t *c, | |
const double *r, | |
size_t n) | |
{ | |
size_t iter; | |
for (iter = 0; iter < n; ++iter) { | |
double re = r[iter]; | |
double im = r[iter+n]; | |
c[iter].array[0] = re; | |
c[iter].array[1] = im; | |
c[iter+n].array[0] = re; | |
c[iter+n].array[1] = -im; | |
} | |
} | |
/* | |
* make the complex eigenvectors from the real array produced by sgeev/zgeev. | |
* c is the array where the results will be left. | |
* r is the source array of reals produced by sgeev/zgeev | |
* i is the eigenvalue imaginary part produced by sgeev/zgeev | |
* n is so that the order of the matrix is n by n | |
*/ | |
static inline void | |
mk_dgeev_complex_eigenvectors(DOUBLECOMPLEX_t *c, | |
const double *r, | |
const double *i, | |
size_t n) | |
{ | |
size_t iter = 0; | |
while (iter < n) | |
{ | |
if (i[iter] == 0.0) { | |
/* eigenvalue was real, eigenvectors as well... */ | |
mk_DOUBLE_complex_array_from_real(c, r, n); | |
c += n; | |
r += n; | |
iter ++; | |
} else { | |
/* eigenvalue was complex, generate a pair of eigenvectors */ | |
mk_DOUBLE_complex_array_conjugate_pair(c, r, n); | |
c += 2*n; | |
r += 2*n; | |
iter += 2; | |
} | |
} | |
} | |
static inline void | |
process_dgeev_results(GEEV_PARAMS_t *params) | |
{ | |
/* REAL versions of geev need the results to be translated | |
* into complex versions. This is the way to deal with imaginary | |
* results. In our gufuncs we will always return complex arrays! | |
*/ | |
mk_DOUBLE_complex_array(params->W, params->WR, params->WI, params->N); | |
/* handle the eigenvectors */ | |
if ('V' == params->JOBVL) { | |
mk_dgeev_complex_eigenvectors(params->VL, params->VLR, | |
params->WI, params->N); | |
} | |
if ('V' == params->JOBVR) { | |
mk_dgeev_complex_eigenvectors(params->VR, params->VRR, | |
params->WI, params->N); | |
} | |
} | |
static inline int | |
init_cgeev(GEEV_PARAMS_t* params, | |
char jobvl, | |
char jobvr, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_uint8 *a, *w, *vl, *vr, *work, *rwork; | |
size_t a_size = n*n*sizeof(fortran_complex); | |
size_t w_size = n*sizeof(fortran_complex); | |
size_t vl_size = jobvl=='V'? n*n*sizeof(fortran_complex) : 0; | |
size_t vr_size = jobvr=='V'? n*n*sizeof(fortran_complex) : 0; | |
size_t rwork_size = 2*n*sizeof(float); | |
size_t work_count = 0; | |
COMPLEX_t work_size_query; | |
fortran_int do_size_query = -1; | |
fortran_int rv; | |
size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; | |
mem_buff = malloc(total_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = a + a_size; | |
vl = w + w_size; | |
vr = vl + vl_size; | |
rwork = vr + vr_size; | |
LAPACK(cgeev)(&jobvl, &jobvr, &n, | |
(void *)a, &n, (void *)w, | |
(void *)vl, &n, (void *)vr, &n, | |
(void *)&work_size_query, &do_size_query, | |
(void *)rwork, | |
&rv); | |
if (0 != rv) | |
goto error; | |
work_count = (size_t) work_size_query.array[0]; | |
mem_buff2 = malloc(work_count*sizeof(fortran_complex)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->WR = rwork; | |
params->WI = NULL; | |
params->VLR = NULL; | |
params->VRR = NULL; | |
params->VL = vl; | |
params->VR = vr; | |
params->WORK = work; | |
params->W = w; | |
params->N = n; | |
params->LDA = n; | |
params->LDVL = n; | |
params->LDVR = n; | |
params->LWORK = (fortran_int)work_count; | |
params->JOBVL = jobvl; | |
params->JOBVR = jobvr; | |
return 1; | |
error: | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_cgeev(GEEV_PARAMS_t* params) | |
{ | |
fortran_int rv; | |
LAPACK(cgeev)(¶ms->JOBVL, ¶ms->JOBVR, | |
¶ms->N, params->A, ¶ms->LDA, | |
params->W, | |
params->VL, ¶ms->LDVL, | |
params->VR, ¶ms->LDVR, | |
params->WORK, ¶ms->LWORK, | |
params->WR, /* actually RWORK */ | |
&rv); | |
return rv; | |
} | |
static inline void | |
process_cgeev_results(GEEV_PARAMS_t *NPY_UNUSED(params)) | |
{ | |
/* nothing to do here, complex versions are ready to copy out */ | |
} | |
static inline int | |
init_zgeev(GEEV_PARAMS_t* params, | |
char jobvl, | |
char jobvr, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_uint8 *a, *w, *vl, *vr, *work, *rwork; | |
size_t a_size = n*n*sizeof(fortran_doublecomplex); | |
size_t w_size = n*sizeof(fortran_doublecomplex); | |
size_t vl_size = jobvl=='V'? n*n*sizeof(fortran_doublecomplex) : 0; | |
size_t vr_size = jobvr=='V'? n*n*sizeof(fortran_doublecomplex) : 0; | |
size_t rwork_size = 2*n*sizeof(double); | |
size_t work_count = 0; | |
DOUBLECOMPLEX_t work_size_query; | |
fortran_int do_size_query = -1; | |
fortran_int rv; | |
size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; | |
mem_buff = malloc(total_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
w = a + a_size; | |
vl = w + w_size; | |
vr = vl + vl_size; | |
rwork = vr + vr_size; | |
LAPACK(zgeev)(&jobvl, &jobvr, &n, | |
(void *)a, &n, (void *)w, | |
(void *)vl, &n, (void *)vr, &n, | |
(void *)&work_size_query, &do_size_query, | |
(void *)rwork, | |
&rv); | |
if (0 != rv) | |
goto error; | |
work_count = (size_t) work_size_query.array[0]; | |
mem_buff2 = malloc(work_count*sizeof(fortran_doublecomplex)); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->WR = rwork; | |
params->WI = NULL; | |
params->VLR = NULL; | |
params->VRR = NULL; | |
params->VL = vl; | |
params->VR = vr; | |
params->WORK = work; | |
params->W = w; | |
params->N = n; | |
params->LDA = n; | |
params->LDVL = n; | |
params->LDVR = n; | |
params->LWORK = (fortran_int)work_count; | |
params->JOBVL = jobvl; | |
params->JOBVR = jobvr; | |
return 1; | |
error: | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_zgeev(GEEV_PARAMS_t* params) | |
{ | |
fortran_int rv; | |
LAPACK(zgeev)(¶ms->JOBVL, ¶ms->JOBVR, | |
¶ms->N, params->A, ¶ms->LDA, | |
params->W, | |
params->VL, ¶ms->LDVL, | |
params->VR, ¶ms->LDVR, | |
params->WORK, ¶ms->LWORK, | |
params->WR, /* actually RWORK */ | |
&rv); | |
return rv; | |
} | |
static inline void | |
process_zgeev_results(GEEV_PARAMS_t *NPY_UNUSED(params)) | |
{ | |
/* nothing to do here, complex versions are ready to copy out */ | |
} | |
static inline void | |
release_sgeev(GEEV_PARAMS_t *params) | |
{ | |
free(params->WORK); | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
FLOAT_eig_wrapper(char JOBVL, | |
char JOBVR, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = 2; | |
int error_occurred = get_fp_invalid_and_clear(); | |
GEEV_PARAMS_t geev_params; | |
assert(JOBVL == 'N'); | |
STACK_TRACE; | |
op_count += 'V'==JOBVL?1:0; | |
op_count += 'V'==JOBVR?1:0; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_sgeev(&geev_params, | |
JOBVL, JOBVR, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t a_in; | |
LINEARIZE_DATA_t w_out; | |
LINEARIZE_DATA_t vl_out; | |
LINEARIZE_DATA_t vr_out; | |
init_linearize_data(&a_in, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
init_linearize_data(&w_out, | |
1, geev_params.N, | |
0, steps[0]); | |
steps += 1; | |
if ('V' == geev_params.JOBVL) { | |
init_linearize_data(&vl_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
} | |
if ('V' == geev_params.JOBVR) { | |
init_linearize_data(&vr_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
char **arg_iter = args; | |
/* copy the matrix in */ | |
linearize_FLOAT_matrix(geev_params.A, *arg_iter++, &a_in); | |
not_ok = call_sgeev(&geev_params); | |
if (!not_ok) { | |
process_sgeev_results(&geev_params); | |
delinearize_CFLOAT_matrix(*arg_iter++, | |
geev_params.W, | |
&w_out); | |
if ('V' == geev_params.JOBVL) | |
delinearize_CFLOAT_matrix(*arg_iter++, | |
geev_params.VL, | |
&vl_out); | |
if ('V' == geev_params.JOBVR) | |
delinearize_CFLOAT_matrix(*arg_iter++, | |
geev_params.VR, | |
&vr_out); | |
} else { | |
/* geev failed */ | |
error_occurred = 1; | |
nan_CFLOAT_matrix(*arg_iter++, &w_out); | |
if ('V' == geev_params.JOBVL) | |
nan_CFLOAT_matrix(*arg_iter++, &vl_out); | |
if ('V' == geev_params.JOBVR) | |
nan_CFLOAT_matrix(*arg_iter++, &vr_out); | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_sgeev(&geev_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
FLOAT_eig(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
FLOAT_eig_wrapper('N', 'V', args, dimensions, steps); | |
} | |
static void | |
FLOAT_eigvals(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
FLOAT_eig_wrapper('N', 'N', args, dimensions, steps); | |
} | |
static inline void | |
release_dgeev(GEEV_PARAMS_t *params) | |
{ | |
free(params->WORK); | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
DOUBLE_eig_wrapper(char JOBVL, | |
char JOBVR, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = 2; | |
int error_occurred = get_fp_invalid_and_clear(); | |
GEEV_PARAMS_t geev_params; | |
assert(JOBVL == 'N'); | |
STACK_TRACE; | |
op_count += 'V'==JOBVL?1:0; | |
op_count += 'V'==JOBVR?1:0; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_dgeev(&geev_params, | |
JOBVL, JOBVR, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t a_in; | |
LINEARIZE_DATA_t w_out; | |
LINEARIZE_DATA_t vl_out; | |
LINEARIZE_DATA_t vr_out; | |
init_linearize_data(&a_in, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
init_linearize_data(&w_out, | |
1, geev_params.N, | |
0, steps[0]); | |
steps += 1; | |
if ('V' == geev_params.JOBVL) { | |
init_linearize_data(&vl_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
} | |
if ('V' == geev_params.JOBVR) { | |
init_linearize_data(&vr_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
char **arg_iter = args; | |
/* copy the matrix in */ | |
linearize_DOUBLE_matrix(geev_params.A, *arg_iter++, &a_in); | |
not_ok = call_dgeev(&geev_params); | |
if (!not_ok) { | |
process_dgeev_results(&geev_params); | |
delinearize_CDOUBLE_matrix(*arg_iter++, | |
geev_params.W, | |
&w_out); | |
if ('V' == geev_params.JOBVL) | |
delinearize_CDOUBLE_matrix(*arg_iter++, | |
geev_params.VL, | |
&vl_out); | |
if ('V' == geev_params.JOBVR) | |
delinearize_CDOUBLE_matrix(*arg_iter++, | |
geev_params.VR, | |
&vr_out); | |
} else { | |
/* geev failed */ | |
error_occurred = 1; | |
nan_CDOUBLE_matrix(*arg_iter++, &w_out); | |
if ('V' == geev_params.JOBVL) | |
nan_CDOUBLE_matrix(*arg_iter++, &vl_out); | |
if ('V' == geev_params.JOBVR) | |
nan_CDOUBLE_matrix(*arg_iter++, &vr_out); | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_dgeev(&geev_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
DOUBLE_eig(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
DOUBLE_eig_wrapper('N', 'V', args, dimensions, steps); | |
} | |
static void | |
DOUBLE_eigvals(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
DOUBLE_eig_wrapper('N', 'N', args, dimensions, steps); | |
} | |
static inline void | |
release_zgeev(GEEV_PARAMS_t *params) | |
{ | |
free(params->WORK); | |
free(params->A); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
CDOUBLE_eig_wrapper(char JOBVL, | |
char JOBVR, | |
char**args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = 2; | |
int error_occurred = get_fp_invalid_and_clear(); | |
GEEV_PARAMS_t geev_params; | |
assert(JOBVL == 'N'); | |
STACK_TRACE; | |
op_count += 'V'==JOBVL?1:0; | |
op_count += 'V'==JOBVR?1:0; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_zgeev(&geev_params, | |
JOBVL, JOBVR, | |
(fortran_int)dimensions[0])) { | |
LINEARIZE_DATA_t a_in; | |
LINEARIZE_DATA_t w_out; | |
LINEARIZE_DATA_t vl_out; | |
LINEARIZE_DATA_t vr_out; | |
init_linearize_data(&a_in, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
init_linearize_data(&w_out, | |
1, geev_params.N, | |
0, steps[0]); | |
steps += 1; | |
if ('V' == geev_params.JOBVL) { | |
init_linearize_data(&vl_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
steps += 2; | |
} | |
if ('V' == geev_params.JOBVR) { | |
init_linearize_data(&vr_out, | |
geev_params.N, geev_params.N, | |
steps[1], steps[0]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
char **arg_iter = args; | |
/* copy the matrix in */ | |
linearize_CDOUBLE_matrix(geev_params.A, *arg_iter++, &a_in); | |
not_ok = call_zgeev(&geev_params); | |
if (!not_ok) { | |
process_zgeev_results(&geev_params); | |
delinearize_CDOUBLE_matrix(*arg_iter++, | |
geev_params.W, | |
&w_out); | |
if ('V' == geev_params.JOBVL) | |
delinearize_CDOUBLE_matrix(*arg_iter++, | |
geev_params.VL, | |
&vl_out); | |
if ('V' == geev_params.JOBVR) | |
delinearize_CDOUBLE_matrix(*arg_iter++, | |
geev_params.VR, | |
&vr_out); | |
} else { | |
/* geev failed */ | |
error_occurred = 1; | |
nan_CDOUBLE_matrix(*arg_iter++, &w_out); | |
if ('V' == geev_params.JOBVL) | |
nan_CDOUBLE_matrix(*arg_iter++, &vl_out); | |
if ('V' == geev_params.JOBVR) | |
nan_CDOUBLE_matrix(*arg_iter++, &vr_out); | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_zgeev(&geev_params); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
static void | |
CDOUBLE_eig(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CDOUBLE_eig_wrapper('N', 'V', args, dimensions, steps); | |
} | |
static void | |
CDOUBLE_eigvals(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CDOUBLE_eig_wrapper('N', 'N', args, dimensions, steps); | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* singular value decomposition */ | |
typedef struct gessd_params_struct | |
{ | |
void *A; | |
void *S; | |
void *U; | |
void *VT; | |
void *WORK; | |
void *RWORK; | |
void *IWORK; | |
fortran_int M; | |
fortran_int N; | |
fortran_int LDA; | |
fortran_int LDU; | |
fortran_int LDVT; | |
fortran_int LWORK; | |
char JOBZ; | |
} GESDD_PARAMS_t; | |
static inline void | |
dump_gesdd_params(const char *name, | |
GESDD_PARAMS_t *params) | |
{ | |
TRACE_TXT("\n%s:\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18p\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %18d\n"\ | |
"%14s: %15c'%c'\n", | |
name, | |
"A", params->A, | |
"S", params->S, | |
"U", params->U, | |
"VT", params->VT, | |
"WORK", params->WORK, | |
"RWORK", params->RWORK, | |
"IWORK", params->IWORK, | |
"M", (int)params->M, | |
"N", (int)params->N, | |
"LDA", (int)params->LDA, | |
"LDU", (int)params->LDU, | |
"LDVT", (int)params->LDVT, | |
"LWORK", (int)params->LWORK, | |
"JOBZ", ' ',params->JOBZ); | |
} | |
static inline int | |
compute_urows_vtcolumns(char jobz, | |
fortran_int m, fortran_int n, | |
fortran_int *urows, fortran_int *vtcolumns) | |
{ | |
fortran_int min_m_n = m<n?m:n; | |
switch(jobz) | |
{ | |
case 'N': | |
*urows = 0; | |
*vtcolumns = 0; | |
break; | |
case 'A': | |
*urows = m; | |
*vtcolumns = n; | |
break; | |
case 'S': | |
{ | |
*urows = min_m_n; | |
*vtcolumns = min_m_n; | |
} | |
break; | |
default: | |
return 0; | |
} | |
return 1; | |
} | |
static inline int | |
init_sgesdd(GESDD_PARAMS_t *params, | |
char jobz, | |
fortran_int m, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_uint8 *a, *s, *u, *vt, *work, *iwork; | |
size_t a_size = (size_t)m*(size_t)n*sizeof(fortran_real); | |
fortran_int min_m_n = m<n?m:n; | |
size_t s_size = ((size_t)min_m_n)*sizeof(fortran_real); | |
fortran_int u_row_count, vt_column_count; | |
size_t u_size, vt_size; | |
fortran_int work_count; | |
size_t work_size; | |
size_t iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
goto error; | |
u_size = ((size_t)u_row_count)*m*sizeof(fortran_real); | |
vt_size = n*((size_t)vt_column_count)*sizeof(fortran_real); | |
mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
s = a + a_size; | |
u = s + s_size; | |
vt = u + u_size; | |
iwork = vt + vt_size; | |
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
{ | |
/* compute optimal work size */ | |
fortran_real work_size_query; | |
fortran_int do_query = -1; | |
fortran_int rv; | |
LAPACK(sgesdd)(&jobz, &m, &n, | |
(void*)a, &m, (void*)s, (void*)u, &m, | |
(void*)vt, &vt_column_count, | |
&work_size_query, &do_query, | |
(void*)iwork, &rv); | |
if (0!=rv) | |
goto error; | |
work_count = (fortran_int)work_size_query; | |
work_size = (size_t)work_count * sizeof(fortran_real); | |
} | |
mem_buff2 = malloc(work_size); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->M = m; | |
params->N = n; | |
params->A = a; | |
params->S = s; | |
params->U = u; | |
params->VT = vt; | |
params->WORK = work; | |
params->RWORK = NULL; | |
params->IWORK = iwork; | |
params->M = m; | |
params->N = n; | |
params->LDA = m; | |
params->LDU = m; | |
params->LDVT = vt_column_count; | |
params->LWORK = work_count; | |
params->JOBZ = jobz; | |
return 1; | |
error: | |
TRACE_TXT("%s failed init\n", __FUNCTION__); | |
free(mem_buff); | |
free(mem_buff2); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_sgesdd(GESDD_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(sgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
params->A, ¶ms->LDA, | |
params->S, | |
params->U, ¶ms->LDU, | |
params->VT, ¶ms->LDVT, | |
params->WORK, ¶ms->LWORK, | |
params->IWORK, | |
&rv); | |
return rv; | |
} | |
static inline int | |
init_dgesdd(GESDD_PARAMS_t *params, | |
char jobz, | |
fortran_int m, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL; | |
npy_uint8 *mem_buff2 = NULL; | |
npy_uint8 *a, *s, *u, *vt, *work, *iwork; | |
size_t a_size = (size_t)m*(size_t)n*sizeof(fortran_doublereal); | |
fortran_int min_m_n = m<n?m:n; | |
size_t s_size = ((size_t)min_m_n)*sizeof(fortran_doublereal); | |
fortran_int u_row_count, vt_column_count; | |
size_t u_size, vt_size; | |
fortran_int work_count; | |
size_t work_size; | |
size_t iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
goto error; | |
u_size = ((size_t)u_row_count)*m*sizeof(fortran_doublereal); | |
vt_size = n*((size_t)vt_column_count)*sizeof(fortran_doublereal); | |
mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
s = a + a_size; | |
u = s + s_size; | |
vt = u + u_size; | |
iwork = vt + vt_size; | |
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
{ | |
/* compute optimal work size */ | |
fortran_doublereal work_size_query; | |
fortran_int do_query = -1; | |
fortran_int rv; | |
LAPACK(dgesdd)(&jobz, &m, &n, | |
(void*)a, &m, (void*)s, (void*)u, &m, | |
(void*)vt, &vt_column_count, | |
&work_size_query, &do_query, | |
(void*)iwork, &rv); | |
if (0!=rv) | |
goto error; | |
work_count = (fortran_int)work_size_query; | |
work_size = (size_t)work_count * sizeof(fortran_doublereal); | |
} | |
mem_buff2 = malloc(work_size); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->M = m; | |
params->N = n; | |
params->A = a; | |
params->S = s; | |
params->U = u; | |
params->VT = vt; | |
params->WORK = work; | |
params->RWORK = NULL; | |
params->IWORK = iwork; | |
params->M = m; | |
params->N = n; | |
params->LDA = m; | |
params->LDU = m; | |
params->LDVT = vt_column_count; | |
params->LWORK = work_count; | |
params->JOBZ = jobz; | |
return 1; | |
error: | |
TRACE_TXT("%s failed init\n", __FUNCTION__); | |
free(mem_buff); | |
free(mem_buff2); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_dgesdd(GESDD_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(dgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
params->A, ¶ms->LDA, | |
params->S, | |
params->U, ¶ms->LDU, | |
params->VT, ¶ms->LDVT, | |
params->WORK, ¶ms->LWORK, | |
params->IWORK, | |
&rv); | |
return rv; | |
} | |
static inline int | |
init_cgesdd(GESDD_PARAMS_t *params, | |
char jobz, | |
fortran_int m, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; | |
npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; | |
size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; | |
fortran_int u_row_count, vt_column_count, work_count; | |
fortran_int min_m_n = m<n?m:n; | |
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
goto error; | |
a_size = ((size_t)m)*((size_t)n)*sizeof(fortran_complex); | |
s_size = ((size_t)min_m_n)*sizeof(fortran_real); | |
u_size = ((size_t)u_row_count)*m*sizeof(fortran_complex); | |
vt_size = n*((size_t)vt_column_count)*sizeof(fortran_complex); | |
rwork_size = 'N'==jobz? | |
7*((size_t)min_m_n) : | |
(5*(size_t)min_m_n*(size_t)min_m_n + 5*(size_t)min_m_n); | |
rwork_size *= sizeof(fortran_complex); | |
iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
mem_buff = malloc(a_size + | |
s_size + | |
u_size + | |
vt_size + | |
rwork_size + | |
iwork_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
s = a + a_size; | |
u = s + s_size; | |
vt = u + u_size; | |
rwork = vt + vt_size; | |
iwork = rwork + rwork_size; | |
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
{ | |
/* compute optimal work size */ | |
fortran_complex work_size_query; | |
fortran_int do_query = -1; | |
fortran_int rv; | |
LAPACK(cgesdd)(&jobz, &m, &n, | |
(void*)a, &m, (void*)s, (void*)u, &m, | |
(void*)vt, &vt_column_count, | |
&work_size_query, &do_query, | |
(void*)rwork, | |
(void*)iwork, &rv); | |
if (0!=rv) | |
goto error; | |
work_count = (fortran_int)((COMPLEX_t*)&work_size_query)->array[0]; | |
work_size = (size_t)work_count * sizeof(fortran_complex); | |
} | |
mem_buff2 = malloc(work_size); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->S = s; | |
params->U = u; | |
params->VT = vt; | |
params->WORK = work; | |
params->RWORK = rwork; | |
params->IWORK = iwork; | |
params->M = m; | |
params->N = n; | |
params->LDA = m; | |
params->LDU = m; | |
params->LDVT = vt_column_count; | |
params->LWORK = work_count; | |
params->JOBZ = jobz; | |
return 1; | |
error: | |
TRACE_TXT("%s failed init\n", __FUNCTION__); | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_cgesdd(GESDD_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(cgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
params->A, ¶ms->LDA, | |
params->S, | |
params->U, ¶ms->LDU, | |
params->VT, ¶ms->LDVT, | |
params->WORK, ¶ms->LWORK, | |
params->RWORK, | |
params->IWORK, | |
&rv); | |
return rv; | |
} | |
static inline int | |
init_zgesdd(GESDD_PARAMS_t *params, | |
char jobz, | |
fortran_int m, | |
fortran_int n) | |
{ | |
npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; | |
npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; | |
size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; | |
fortran_int u_row_count, vt_column_count, work_count; | |
fortran_int min_m_n = m<n?m:n; | |
if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count)) | |
goto error; | |
a_size = ((size_t)m)*((size_t)n)*sizeof(fortran_doublecomplex); | |
s_size = ((size_t)min_m_n)*sizeof(fortran_doublereal); | |
u_size = ((size_t)u_row_count)*m*sizeof(fortran_doublecomplex); | |
vt_size = n*((size_t)vt_column_count)*sizeof(fortran_doublecomplex); | |
rwork_size = 'N'==jobz? | |
7*((size_t)min_m_n) : | |
(5*(size_t)min_m_n*(size_t)min_m_n + 5*(size_t)min_m_n); | |
rwork_size *= sizeof(fortran_doublecomplex); | |
iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int); | |
mem_buff = malloc(a_size + | |
s_size + | |
u_size + | |
vt_size + | |
rwork_size + | |
iwork_size); | |
if (!mem_buff) | |
goto error; | |
a = mem_buff; | |
s = a + a_size; | |
u = s + s_size; | |
vt = u + u_size; | |
rwork = vt + vt_size; | |
iwork = rwork + rwork_size; | |
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ | |
vt_column_count = vt_column_count < 1? 1 : vt_column_count; | |
{ | |
/* compute optimal work size */ | |
fortran_doublecomplex work_size_query; | |
fortran_int do_query = -1; | |
fortran_int rv; | |
LAPACK(zgesdd)(&jobz, &m, &n, | |
(void*)a, &m, (void*)s, (void*)u, &m, | |
(void*)vt, &vt_column_count, | |
&work_size_query, &do_query, | |
(void*)rwork, | |
(void*)iwork, &rv); | |
if (0!=rv) | |
goto error; | |
work_count = (fortran_int)((DOUBLECOMPLEX_t*)&work_size_query)->array[0]; | |
work_size = (size_t)work_count * sizeof(fortran_doublecomplex); | |
} | |
mem_buff2 = malloc(work_size); | |
if (!mem_buff2) | |
goto error; | |
work = mem_buff2; | |
params->A = a; | |
params->S = s; | |
params->U = u; | |
params->VT = vt; | |
params->WORK = work; | |
params->RWORK = rwork; | |
params->IWORK = iwork; | |
params->M = m; | |
params->N = n; | |
params->LDA = m; | |
params->LDU = m; | |
params->LDVT = vt_column_count; | |
params->LWORK = work_count; | |
params->JOBZ = jobz; | |
return 1; | |
error: | |
TRACE_TXT("%s failed init\n", __FUNCTION__); | |
free(mem_buff2); | |
free(mem_buff); | |
memset(params, 0, sizeof(*params)); | |
return 0; | |
} | |
static inline fortran_int | |
call_zgesdd(GESDD_PARAMS_t *params) | |
{ | |
fortran_int rv; | |
LAPACK(zgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, | |
params->A, ¶ms->LDA, | |
params->S, | |
params->U, ¶ms->LDU, | |
params->VT, ¶ms->LDVT, | |
params->WORK, ¶ms->LWORK, | |
params->RWORK, | |
params->IWORK, | |
&rv); | |
return rv; | |
} | |
static inline void | |
release_sgesdd(GESDD_PARAMS_t* params) | |
{ | |
/* A and WORK contain allocated blocks */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
FLOAT_svd_wrapper(char JOBZ, | |
char **args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
int error_occurred = get_fp_invalid_and_clear(); | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:4; | |
GESDD_PARAMS_t params; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_sgesdd(¶ms, | |
JOBZ, | |
(fortran_int)dimensions[0], | |
(fortran_int)dimensions[1])) { | |
LINEARIZE_DATA_t a_in, u_out, s_out, v_out; | |
init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); | |
if ('N' == params.JOBZ) { | |
/* only the singular values are wanted */ | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); | |
} else { | |
fortran_int u_columns, v_rows; | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
if ('S' == params.JOBZ) { | |
u_columns = min_m_n; | |
v_rows = min_m_n; | |
} else { | |
u_columns = params.M; | |
v_rows = params.N; | |
} | |
init_linearize_data(&u_out, | |
u_columns, params.M, | |
steps[3], steps[2]); | |
init_linearize_data(&s_out, | |
1, min_m_n, | |
0, steps[4]); | |
init_linearize_data(&v_out, | |
params.N, v_rows, | |
steps[6], steps[5]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_FLOAT_matrix(params.A, args[0], &a_in); | |
not_ok = call_sgesdd(¶ms); | |
if (!not_ok) { | |
if ('N' == params.JOBZ) { | |
delinearize_FLOAT_matrix(args[1], params.S, &s_out); | |
} else { | |
delinearize_FLOAT_matrix(args[1], params.U, &u_out); | |
delinearize_FLOAT_matrix(args[2], params.S, &s_out); | |
delinearize_FLOAT_matrix(args[3], params.VT, &v_out); | |
} | |
} else { | |
error_occurred = 1; | |
if ('N' == params.JOBZ) { | |
nan_FLOAT_matrix(args[1], &s_out); | |
} else { | |
nan_FLOAT_matrix(args[1], &u_out); | |
nan_FLOAT_matrix(args[2], &s_out); | |
nan_FLOAT_matrix(args[3], &v_out); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_sgesdd(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/**end repeat*/ | |
/* svd gufunc entry points */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
*/ | |
static void | |
FLOAT_svd_N(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
FLOAT_svd_wrapper('N', args, dimensions, steps); | |
} | |
static void | |
FLOAT_svd_S(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
FLOAT_svd_wrapper('S', args, dimensions, steps); | |
} | |
static void | |
FLOAT_svd_A(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
FLOAT_svd_wrapper('A', args, dimensions, steps); | |
} | |
static inline void | |
release_dgesdd(GESDD_PARAMS_t* params) | |
{ | |
/* A and WORK contain allocated blocks */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
DOUBLE_svd_wrapper(char JOBZ, | |
char **args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
int error_occurred = get_fp_invalid_and_clear(); | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:4; | |
GESDD_PARAMS_t params; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_dgesdd(¶ms, | |
JOBZ, | |
(fortran_int)dimensions[0], | |
(fortran_int)dimensions[1])) { | |
LINEARIZE_DATA_t a_in, u_out, s_out, v_out; | |
init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); | |
if ('N' == params.JOBZ) { | |
/* only the singular values are wanted */ | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); | |
} else { | |
fortran_int u_columns, v_rows; | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
if ('S' == params.JOBZ) { | |
u_columns = min_m_n; | |
v_rows = min_m_n; | |
} else { | |
u_columns = params.M; | |
v_rows = params.N; | |
} | |
init_linearize_data(&u_out, | |
u_columns, params.M, | |
steps[3], steps[2]); | |
init_linearize_data(&s_out, | |
1, min_m_n, | |
0, steps[4]); | |
init_linearize_data(&v_out, | |
params.N, v_rows, | |
steps[6], steps[5]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_DOUBLE_matrix(params.A, args[0], &a_in); | |
not_ok = call_dgesdd(¶ms); | |
if (!not_ok) { | |
if ('N' == params.JOBZ) { | |
delinearize_DOUBLE_matrix(args[1], params.S, &s_out); | |
} else { | |
delinearize_DOUBLE_matrix(args[1], params.U, &u_out); | |
delinearize_DOUBLE_matrix(args[2], params.S, &s_out); | |
delinearize_DOUBLE_matrix(args[3], params.VT, &v_out); | |
} | |
} else { | |
error_occurred = 1; | |
if ('N' == params.JOBZ) { | |
nan_DOUBLE_matrix(args[1], &s_out); | |
} else { | |
nan_DOUBLE_matrix(args[1], &u_out); | |
nan_DOUBLE_matrix(args[2], &s_out); | |
nan_DOUBLE_matrix(args[3], &v_out); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_dgesdd(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/**end repeat*/ | |
/* svd gufunc entry points */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
*/ | |
static void | |
DOUBLE_svd_N(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
DOUBLE_svd_wrapper('N', args, dimensions, steps); | |
} | |
static void | |
DOUBLE_svd_S(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
DOUBLE_svd_wrapper('S', args, dimensions, steps); | |
} | |
static void | |
DOUBLE_svd_A(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
DOUBLE_svd_wrapper('A', args, dimensions, steps); | |
} | |
static inline void | |
release_cgesdd(GESDD_PARAMS_t* params) | |
{ | |
/* A and WORK contain allocated blocks */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
CFLOAT_svd_wrapper(char JOBZ, | |
char **args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
int error_occurred = get_fp_invalid_and_clear(); | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:4; | |
GESDD_PARAMS_t params; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_cgesdd(¶ms, | |
JOBZ, | |
(fortran_int)dimensions[0], | |
(fortran_int)dimensions[1])) { | |
LINEARIZE_DATA_t a_in, u_out, s_out, v_out; | |
init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); | |
if ('N' == params.JOBZ) { | |
/* only the singular values are wanted */ | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); | |
} else { | |
fortran_int u_columns, v_rows; | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
if ('S' == params.JOBZ) { | |
u_columns = min_m_n; | |
v_rows = min_m_n; | |
} else { | |
u_columns = params.M; | |
v_rows = params.N; | |
} | |
init_linearize_data(&u_out, | |
u_columns, params.M, | |
steps[3], steps[2]); | |
init_linearize_data(&s_out, | |
1, min_m_n, | |
0, steps[4]); | |
init_linearize_data(&v_out, | |
params.N, v_rows, | |
steps[6], steps[5]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_CFLOAT_matrix(params.A, args[0], &a_in); | |
not_ok = call_cgesdd(¶ms); | |
if (!not_ok) { | |
if ('N' == params.JOBZ) { | |
delinearize_FLOAT_matrix(args[1], params.S, &s_out); | |
} else { | |
delinearize_CFLOAT_matrix(args[1], params.U, &u_out); | |
delinearize_FLOAT_matrix(args[2], params.S, &s_out); | |
delinearize_CFLOAT_matrix(args[3], params.VT, &v_out); | |
} | |
} else { | |
error_occurred = 1; | |
if ('N' == params.JOBZ) { | |
nan_FLOAT_matrix(args[1], &s_out); | |
} else { | |
nan_CFLOAT_matrix(args[1], &u_out); | |
nan_FLOAT_matrix(args[2], &s_out); | |
nan_CFLOAT_matrix(args[3], &v_out); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_cgesdd(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/**end repeat*/ | |
/* svd gufunc entry points */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
*/ | |
static void | |
CFLOAT_svd_N(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CFLOAT_svd_wrapper('N', args, dimensions, steps); | |
} | |
static void | |
CFLOAT_svd_S(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CFLOAT_svd_wrapper('S', args, dimensions, steps); | |
} | |
static void | |
CFLOAT_svd_A(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CFLOAT_svd_wrapper('A', args, dimensions, steps); | |
} | |
static inline void | |
release_zgesdd(GESDD_PARAMS_t* params) | |
{ | |
/* A and WORK contain allocated blocks */ | |
free(params->A); | |
free(params->WORK); | |
memset(params, 0, sizeof(*params)); | |
} | |
static inline void | |
CDOUBLE_svd_wrapper(char JOBZ, | |
char **args, | |
npy_intp* dimensions, | |
npy_intp* steps) | |
{ | |
ptrdiff_t outer_steps[4]; | |
int error_occurred = get_fp_invalid_and_clear(); | |
size_t iter; | |
size_t outer_dim = *dimensions++; | |
size_t op_count = (JOBZ=='N')?2:4; | |
GESDD_PARAMS_t params; | |
for (iter=0; iter < op_count; ++iter) { | |
outer_steps[iter] = (ptrdiff_t) steps[iter]; | |
} | |
steps += op_count; | |
if (init_zgesdd(¶ms, | |
JOBZ, | |
(fortran_int)dimensions[0], | |
(fortran_int)dimensions[1])) { | |
LINEARIZE_DATA_t a_in, u_out, s_out, v_out; | |
init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]); | |
if ('N' == params.JOBZ) { | |
/* only the singular values are wanted */ | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]); | |
} else { | |
fortran_int u_columns, v_rows; | |
fortran_int min_m_n = params.M < params.N? params.M : params.N; | |
if ('S' == params.JOBZ) { | |
u_columns = min_m_n; | |
v_rows = min_m_n; | |
} else { | |
u_columns = params.M; | |
v_rows = params.N; | |
} | |
init_linearize_data(&u_out, | |
u_columns, params.M, | |
steps[3], steps[2]); | |
init_linearize_data(&s_out, | |
1, min_m_n, | |
0, steps[4]); | |
init_linearize_data(&v_out, | |
params.N, v_rows, | |
steps[6], steps[5]); | |
} | |
for (iter = 0; iter < outer_dim; ++iter) { | |
int not_ok; | |
/* copy the matrix in */ | |
linearize_CDOUBLE_matrix(params.A, args[0], &a_in); | |
not_ok = call_zgesdd(¶ms); | |
if (!not_ok) { | |
if ('N' == params.JOBZ) { | |
delinearize_DOUBLE_matrix(args[1], params.S, &s_out); | |
} else { | |
delinearize_CDOUBLE_matrix(args[1], params.U, &u_out); | |
delinearize_DOUBLE_matrix(args[2], params.S, &s_out); | |
delinearize_CDOUBLE_matrix(args[3], params.VT, &v_out); | |
} | |
} else { | |
error_occurred = 1; | |
if ('N' == params.JOBZ) { | |
nan_DOUBLE_matrix(args[1], &s_out); | |
} else { | |
nan_CDOUBLE_matrix(args[1], &u_out); | |
nan_DOUBLE_matrix(args[2], &s_out); | |
nan_CDOUBLE_matrix(args[3], &v_out); | |
} | |
} | |
update_pointers((npy_uint8**)args, outer_steps, op_count); | |
} | |
release_zgesdd(¶ms); | |
} | |
set_fp_invalid_or_clear(error_occurred); | |
} | |
/**end repeat*/ | |
/* svd gufunc entry points */ | |
/**begin repeat | |
#TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# | |
*/ | |
static void | |
CDOUBLE_svd_N(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CDOUBLE_svd_wrapper('N', args, dimensions, steps); | |
} | |
static void | |
CDOUBLE_svd_S(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CDOUBLE_svd_wrapper('S', args, dimensions, steps); | |
} | |
static void | |
CDOUBLE_svd_A(char **args, | |
npy_intp *dimensions, | |
npy_intp *steps, | |
void *NPY_UNUSED(func)) | |
{ | |
CDOUBLE_svd_wrapper('A', args, dimensions, steps); | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* gufunc registration */ | |
static void *array_of_nulls[] = { | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL, | |
(void *)NULL | |
}; | |
/* There are problems with eig in complex single precision. | |
* That kernel is disabled | |
*/ | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); | |
GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); | |
GUFUNC_FUNC_ARRAY_EIG(eig); | |
GUFUNC_FUNC_ARRAY_EIG(eigvals); | |
static char equal_2_types[] = { | |
NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
static char equal_3_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
/* second result is logdet, that will always be a REAL */ | |
static char slogdet_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE | |
}; | |
static char eigh_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE | |
}; | |
static char eighvals_types[] = { | |
NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_FLOAT, | |
NPY_CDOUBLE, NPY_DOUBLE | |
}; | |
static char eig_types[] = { | |
NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT, | |
NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
static char eigvals_types[] = { | |
NPY_FLOAT, NPY_CFLOAT, | |
NPY_DOUBLE, NPY_CDOUBLE, | |
NPY_CDOUBLE, NPY_CDOUBLE | |
}; | |
static char svd_1_1_types[] = { | |
NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_FLOAT, | |
NPY_CDOUBLE, NPY_DOUBLE | |
}; | |
static char svd_1_3_types[] = { | |
NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, | |
NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, | |
NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, | |
NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE | |
}; | |
typedef struct gufunc_descriptor_struct { | |
char *name; | |
char *signature; | |
char *doc; | |
int ntypes; | |
int nin; | |
int nout; | |
PyUFuncGenericFunction *funcs; | |
char *types; | |
} GUFUNC_DESCRIPTOR_t; | |
GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { | |
{ | |
"slogdet", | |
"(m,m)->(),()", | |
"slogdet on the last two dimensions and broadcast on the rest. \n"\ | |
"Results in two arrays, one with sign and the other with log of the"\ | |
" determinants. \n"\ | |
" \"(m,m)->(),()\" \n", | |
4, 1, 2, | |
FUNC_ARRAY_NAME(slogdet), | |
slogdet_types | |
}, | |
{ | |
"det", | |
"(m,m)->()", | |
"det of the last two dimensions and broadcast on the rest. \n"\ | |
" \"(m,m)->()\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(det), | |
equal_2_types | |
}, | |
{ | |
"eigh_lo", | |
"(m,m)->(m),(m,m)", | |
"eigh on the last two dimension and broadcast to the rest, using"\ | |
" lower triangle \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
"eigenvectors. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
4, 1, 2, | |
FUNC_ARRAY_NAME(eighlo), | |
eigh_types | |
}, | |
{ | |
"eigh_up", | |
"(m,m)->(m),(m,m)", | |
"eigh on the last two dimension and broadcast to the rest, using"\ | |
" upper triangle. \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
" eigenvectors. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
4, 1, 2, | |
FUNC_ARRAY_NAME(eighup), | |
eigh_types | |
}, | |
{ | |
"eigvalsh_lo", | |
"(m,m)->(m)", | |
"eigh on the last two dimension and broadcast to the rest, using"\ | |
" lower triangle. \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
"eigenvectors. \n"\ | |
" \"(m,m)->(m)\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(eigvalshlo), | |
eighvals_types | |
}, | |
{ | |
"eigvalsh_up", | |
"(m,m)->(m)", | |
"eigvalsh on the last two dimension and broadcast to the rest,"\ | |
" using upper triangle. \n"\ | |
"Results in a vector of eigenvalues and a matrix with the"\ | |
"eigenvectors.\n"\ | |
" \"(m,m)->(m)\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(eigvalshup), | |
eighvals_types | |
}, | |
{ | |
"solve", | |
"(m,m),(m,n)->(m,n)", | |
"solve the system a x = b, on the last two dimensions, broadcast"\ | |
" to the rest. \n"\ | |
"Results in a matrices with the solutions. \n"\ | |
" \"(m,m),(m,n)->(m,n)\" \n", | |
4, 2, 1, | |
FUNC_ARRAY_NAME(solve), | |
equal_3_types | |
}, | |
{ | |
"solve1", | |
"(m,m),(m)->(m)", | |
"solve the system a x = b, for b being a vector, broadcast in"\ | |
" the outer dimensions. \n"\ | |
"Results in vectors with the solutions. \n"\ | |
" \"(m,m),(m)->(m)\" \n", | |
4,2,1, | |
FUNC_ARRAY_NAME(solve1), | |
equal_3_types | |
}, | |
{ | |
"inv", | |
"(m,m)->(m,m)", | |
"compute the inverse of the last two dimensions and broadcast"\ | |
" to the rest. \n"\ | |
"Results in the inverse matrices. \n"\ | |
" \"(m,m)->(m,m)\" \n", | |
4,1,1, | |
FUNC_ARRAY_NAME(inv), | |
equal_2_types | |
}, | |
{ | |
"cholesky_lo", | |
"(m,m)->(m,m)", | |
"cholesky decomposition of hermitian positive-definite matrices. \n"\ | |
"Broadcast to all outer dimensions. \n"\ | |
" \"(m,m)->(m,m)\" \n", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(cholesky_lo), | |
equal_2_types | |
}, | |
{ | |
"svd_m", | |
"(m,n)->(m)", | |
"svd when n>=m. ", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(svd_N), | |
svd_1_1_types | |
}, | |
{ | |
"svd_n", | |
"(m,n)->(n)", | |
"svd when n<=m", | |
4, 1, 1, | |
FUNC_ARRAY_NAME(svd_N), | |
svd_1_1_types | |
}, | |
{ | |
"svd_m_s", | |
"(m,n)->(m,m),(m),(m,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_S), | |
svd_1_3_types | |
}, | |
{ | |
"svd_n_s", | |
"(m,n)->(m,n),(n),(n,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_S), | |
svd_1_3_types | |
}, | |
{ | |
"svd_m_f", | |
"(m,n)->(m,m),(m),(n,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_A), | |
svd_1_3_types | |
}, | |
{ | |
"svd_n_f", | |
"(m,n)->(m,m),(n),(n,n)", | |
"svd when m>=n", | |
4, 1, 3, | |
FUNC_ARRAY_NAME(svd_A), | |
svd_1_3_types | |
}, | |
{ | |
"eig", | |
"(m,m)->(m),(m,m)", | |
"eig on the last two dimension and broadcast to the rest. \n"\ | |
"Results in a vector with the eigenvalues and a matrix with the"\ | |
" eigenvectors. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
3, 1, 2, | |
FUNC_ARRAY_NAME(eig), | |
eig_types | |
}, | |
{ | |
"eigvals", | |
"(m,m)->(m)", | |
"eigvals on the last two dimension and broadcast to the rest. \n"\ | |
"Results in a vector of eigenvalues. \n"\ | |
" \"(m,m)->(m),(m,m)\" \n", | |
3, 1, 1, | |
FUNC_ARRAY_NAME(eigvals), | |
eigvals_types | |
}, | |
}; | |
static void | |
addUfuncs(PyObject *dictionary) { | |
PyObject *f; | |
int i; | |
const int gufunc_count = sizeof(gufunc_descriptors)/ | |
sizeof(gufunc_descriptors[0]); | |
for (i=0; i < gufunc_count; i++) { | |
GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i]; | |
f = PyUFunc_FromFuncAndDataAndSignature(d->funcs, | |
array_of_nulls, | |
d->types, | |
d->ntypes, | |
d->nin, | |
d->nout, | |
PyUFunc_None, | |
d->name, | |
d->doc, | |
0, | |
d->signature); | |
PyDict_SetItemString(dictionary, d->name, f); | |
dump_ufunc_object((PyUFuncObject*) f); | |
Py_DECREF(f); | |
} | |
} | |
/* -------------------------------------------------------------------------- */ | |
/* Module initialization stuff */ | |
static PyMethodDef UMath_LinAlgMethods[] = { | |
{NULL, NULL, 0, NULL} /* Sentinel */ | |
}; | |
static struct PyModuleDef moduledef = { | |
PyModuleDef_HEAD_INIT, | |
UMATH_LINALG_MODULE_NAME, | |
NULL, | |
-1, | |
UMath_LinAlgMethods, | |
NULL, | |
NULL, | |
NULL, | |
NULL | |
}; | |
PyObject *PyInit__umath_linalg(void) | |
PyMODINIT_FUNC | |
init_umath_linalg(void) | |
{ | |
PyObject *m; | |
PyObject *d; | |
PyObject *version; | |
init_constants(); | |
m = PyModule_Create(&moduledef); | |
m = Py_InitModule(UMATH_LINALG_MODULE_NAME, UMath_LinAlgMethods); | |
if (m == NULL) | |
return RETVAL; | |
import_array(); | |
import_ufunc(); | |
d = PyModule_GetDict(m); | |
version = PyString_FromString(umath_linalg_version_string); | |
PyDict_SetItemString(d, "__version__", version); | |
Py_DECREF(version); | |
/* Load the ufunc operators into the module's namespace */ | |
addUfuncs(d); | |
if (PyErr_Occurred()) { | |
PyErr_SetString(PyExc_RuntimeError, | |
"cannot load _umath_linalg module."); | |
} | |
return RETVAL; | |
} | |