ModelForge
/

spam-classifier

Text Classification

binary-classification

Model card Files Files and versions Community

spam-classifier / venv /lib /python3.11 /site-packages /sklearn /svm /_liblinear.pyx

Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

4.1 kB

	"""
	Wrapper for liblinear

	Author: [email protected]
	"""

	import numpy as np

	from ..utils._cython_blas cimport _dot, _axpy, _scal, _nrm2
	from ..utils._typedefs cimport float32_t, float64_t, int32_t

	include "_liblinear.pxi"


	def train_wrap(
	object X,
	const float64_t[::1] Y,
	bint is_sparse,
	int solver_type,
	double eps,
	double bias,
	double C,
	const float64_t[:] class_weight,
	int max_iter,
	unsigned random_seed,
	double epsilon,
	const float64_t[::1] sample_weight
	):
	cdef parameter *param
	cdef problem *problem
	cdef model *model
	cdef char_const_ptr error_msg
	cdef int len_w
	cdef bint X_has_type_float64 = X.dtype == np.float64
	cdef char * X_data_bytes_ptr
	cdef const float64_t[::1] X_data_64
	cdef const float32_t[::1] X_data_32
	cdef const int32_t[::1] X_indices
	cdef const int32_t[::1] X_indptr

	if is_sparse:
	X_indices = X.indices
	X_indptr = X.indptr
	if X_has_type_float64:
	X_data_64 = X.data
	X_data_bytes_ptr = <char *> &X_data_64[0]
	else:
	X_data_32 = X.data
	X_data_bytes_ptr = <char *> &X_data_32[0]

	problem = csr_set_problem(
	X_data_bytes_ptr,
	X_has_type_float64,
	<char *> &X_indices[0],
	<char *> &X_indptr[0],
	(<int32_t>X.shape[0]),
	(<int32_t>X.shape[1]),
	(<int32_t>X.nnz),
	bias,
	<char *> &sample_weight[0],
	<char *> &Y[0]
	)
	else:
	X_as_1d_array = X.reshape(-1)
	if X_has_type_float64:
	X_data_64 = X_as_1d_array
	X_data_bytes_ptr = <char *> &X_data_64[0]
	else:
	X_data_32 = X_as_1d_array
	X_data_bytes_ptr = <char *> &X_data_32[0]

	problem = set_problem(
	X_data_bytes_ptr,
	X_has_type_float64,
	(<int32_t>X.shape[0]),
	(<int32_t>X.shape[1]),
	(<int32_t>np.count_nonzero(X)),
	bias,
	<char *> &sample_weight[0],
	<char *> &Y[0]
	)

	cdef int32_t[::1] class_weight_label = np.arange(class_weight.shape[0], dtype=np.intc)
	param = set_parameter(
	solver_type,
	eps,
	C,
	class_weight.shape[0],
	<char *> &class_weight_label[0] if class_weight_label.size > 0 else NULL,
	<char *> &class_weight[0] if class_weight.size > 0 else NULL,
	max_iter,
	random_seed,
	epsilon
	)

	error_msg = check_parameter(problem, param)
	if error_msg:
	free_problem(problem)
	free_parameter(param)
	raise ValueError(error_msg)

	cdef BlasFunctions blas_functions
	blas_functions.dot = _dot[double]
	blas_functions.axpy = _axpy[double]
	blas_functions.scal = _scal[double]
	blas_functions.nrm2 = _nrm2[double]

	# early return
	with nogil:
	model = train(problem, param, &blas_functions)

	# FREE
	free_problem(problem)
	free_parameter(param)
	# destroy_param(param) don't call this or it will destroy class_weight_label and class_weight

	# coef matrix holder created as fortran since that's what's used in liblinear
	cdef float64_t[::1, :] w
	cdef int nr_class = get_nr_class(model)

	cdef int labels_ = nr_class
	if nr_class == 2:
	labels_ = 1
	cdef int32_t[::1] n_iter = np.zeros(labels_, dtype=np.intc)
	get_n_iter(model, <int *> &n_iter[0])

	cdef int nr_feature = get_nr_feature(model)
	if bias > 0:
	nr_feature = nr_feature + 1
	if nr_class == 2 and solver_type != 4: # solver is not Crammer-Singer
	w = np.empty((1, nr_feature), order='F')
	copy_w(&w[0, 0], model, nr_feature)
	else:
	len_w = (nr_class) * nr_feature
	w = np.empty((nr_class, nr_feature), order='F')
	copy_w(&w[0, 0], model, len_w)

	free_and_destroy_model(&model)

	return w.base, n_iter.base


	def set_verbosity_wrap(int verbosity):
	"""
	Control verbosity of libsvm library
	"""
	set_verbosity(verbosity)