spam-classifier / venv /lib /python3.11 /site-packages /sklearn /utils /_openmp_helpers.pyx

Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

3.14 kB

	import os
	from joblib import cpu_count


	# Module level cache for cpu_count as we do not expect this to change during
	# the lifecycle of a Python program. This dictionary is keyed by
	# only_physical_cores.
	_CPU_COUNTS = {}


	def _openmp_parallelism_enabled():
	"""Determines whether scikit-learn has been built with OpenMP

	It allows to retrieve at runtime the information gathered at compile time.
	"""
	# SKLEARN_OPENMP_PARALLELISM_ENABLED is resolved at compile time and defined
	# in _openmp_helpers.pxd as a boolean. This function exposes it to Python.
	return SKLEARN_OPENMP_PARALLELISM_ENABLED


	cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True):
	"""Determine the effective number of threads to be used for OpenMP calls

	- For ``n_threads = None``,
	- if the ``OMP_NUM_THREADS`` environment variable is set, return
	``openmp.omp_get_max_threads()``
	- otherwise, return the minimum between ``openmp.omp_get_max_threads()``
	and the number of cpus, taking cgroups quotas into account. Cgroups
	quotas can typically be set by tools such as Docker.
	The result of ``omp_get_max_threads`` can be influenced by environment
	variable ``OMP_NUM_THREADS`` or at runtime by ``omp_set_num_threads``.

	- For ``n_threads > 0``, return this as the maximal number of threads for
	parallel OpenMP calls.

	- For ``n_threads < 0``, return the maximal number of threads minus
	``\|n_threads + 1\|``. In particular ``n_threads = -1`` will use as many
	threads as there are available cores on the machine.

	- Raise a ValueError for ``n_threads = 0``.

	Passing the `only_physical_cores=False` flag makes it possible to use extra
	threads for SMT/HyperThreading logical cores. It has been empirically
	observed that using as many threads as available SMT cores can slightly
	improve the performance in some cases, but can severely degrade
	performance other times. Therefore it is recommended to use
	`only_physical_cores=True` unless an empirical study has been conducted to
	assess the impact of SMT on a case-by-case basis (using various input data
	shapes, in particular small data shapes).

	If scikit-learn is built without OpenMP support, always return 1.
	"""
	if n_threads == 0:
	raise ValueError("n_threads = 0 is invalid")

	if not SKLEARN_OPENMP_PARALLELISM_ENABLED:
	# OpenMP disabled at build-time => sequential mode
	return 1

	if os.getenv("OMP_NUM_THREADS"):
	# Fall back to user provided number of threads making it possible
	# to exceed the number of cpus.
	max_n_threads = omp_get_max_threads()
	else:
	try:
	n_cpus = _CPU_COUNTS[only_physical_cores]
	except KeyError:
	n_cpus = cpu_count(only_physical_cores=only_physical_cores)
	_CPU_COUNTS[only_physical_cores] = n_cpus
	max_n_threads = min(omp_get_max_threads(), n_cpus)

	if n_threads is None:
	return max_n_threads
	elif n_threads < 0:
	return max(1, max_n_threads + n_threads + 1)

	return n_threads