Spaces:
Sleeping
Sleeping
# Unlike the rest of the PyTorch this file must be python2 compliant. | |
# This script outputs relevant system environment info | |
# Run it with `python collect_env.py` or `python -m torch.utils.collect_env` | |
import datetime | |
import locale | |
import re | |
import subprocess | |
import sys | |
import os | |
from collections import namedtuple | |
try: | |
import torch | |
TORCH_AVAILABLE = True | |
except (ImportError, NameError, AttributeError, OSError): | |
TORCH_AVAILABLE = False | |
# System Environment Information | |
SystemEnv = namedtuple('SystemEnv', [ | |
'torch_version', | |
'is_debug_build', | |
'cuda_compiled_version', | |
'gcc_version', | |
'clang_version', | |
'cmake_version', | |
'os', | |
'libc_version', | |
'python_version', | |
'python_platform', | |
'is_cuda_available', | |
'cuda_runtime_version', | |
'cuda_module_loading', | |
'nvidia_driver_version', | |
'nvidia_gpu_models', | |
'cudnn_version', | |
'pip_version', # 'pip' or 'pip3' | |
'pip_packages', | |
'conda_packages', | |
'hip_compiled_version', | |
'hip_runtime_version', | |
'miopen_runtime_version', | |
'caching_allocator_config', | |
'is_xnnpack_available', | |
'cpu_info', | |
]) | |
DEFAULT_CONDA_PATTERNS = { | |
"torch", | |
"numpy", | |
"cudatoolkit", | |
"soumith", | |
"mkl", | |
"magma", | |
"triton", | |
"optree", | |
} | |
DEFAULT_PIP_PATTERNS = { | |
"torch", | |
"numpy", | |
"mypy", | |
"flake8", | |
"triton", | |
"optree", | |
"onnx", | |
} | |
def run(command): | |
"""Return (return-code, stdout, stderr).""" | |
shell = True if type(command) is str else False | |
p = subprocess.Popen(command, stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, shell=shell) | |
raw_output, raw_err = p.communicate() | |
rc = p.returncode | |
if get_platform() == 'win32': | |
enc = 'oem' | |
else: | |
enc = locale.getpreferredencoding() | |
output = raw_output.decode(enc) | |
err = raw_err.decode(enc) | |
return rc, output.strip(), err.strip() | |
def run_and_read_all(run_lambda, command): | |
"""Run command using run_lambda; reads and returns entire output if rc is 0.""" | |
rc, out, _ = run_lambda(command) | |
if rc != 0: | |
return None | |
return out | |
def run_and_parse_first_match(run_lambda, command, regex): | |
"""Run command using run_lambda, returns the first regex match if it exists.""" | |
rc, out, _ = run_lambda(command) | |
if rc != 0: | |
return None | |
match = re.search(regex, out) | |
if match is None: | |
return None | |
return match.group(1) | |
def run_and_return_first_line(run_lambda, command): | |
"""Run command using run_lambda and returns first line if output is not empty.""" | |
rc, out, _ = run_lambda(command) | |
if rc != 0: | |
return None | |
return out.split('\n')[0] | |
def get_conda_packages(run_lambda, patterns=None): | |
if patterns is None: | |
patterns = DEFAULT_CONDA_PATTERNS | |
conda = os.environ.get('CONDA_EXE', 'conda') | |
out = run_and_read_all(run_lambda, "{} list".format(conda)) | |
if out is None: | |
return out | |
return "\n".join( | |
line | |
for line in out.splitlines() | |
if not line.startswith("#") | |
and any(name in line for name in patterns) | |
) | |
def get_gcc_version(run_lambda): | |
return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)') | |
def get_clang_version(run_lambda): | |
return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)') | |
def get_cmake_version(run_lambda): | |
return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)') | |
def get_nvidia_driver_version(run_lambda): | |
if get_platform() == 'darwin': | |
cmd = 'kextstat | grep -i cuda' | |
return run_and_parse_first_match(run_lambda, cmd, | |
r'com[.]nvidia[.]CUDA [(](.*?)[)]') | |
smi = get_nvidia_smi() | |
return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ') | |
def get_gpu_info(run_lambda): | |
if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None): | |
if TORCH_AVAILABLE and torch.cuda.is_available(): | |
if torch.version.hip is not None: | |
prop = torch.cuda.get_device_properties(0) | |
if hasattr(prop, "gcnArchName"): | |
gcnArch = " ({})".format(prop.gcnArchName) | |
else: | |
gcnArch = "NoGCNArchNameOnOldPyTorch" | |
else: | |
gcnArch = "" | |
return torch.cuda.get_device_name(None) + gcnArch | |
return None | |
smi = get_nvidia_smi() | |
uuid_regex = re.compile(r' \(UUID: .+?\)') | |
rc, out, _ = run_lambda(smi + ' -L') | |
if rc != 0: | |
return None | |
# Anonymize GPUs by removing their UUID | |
return re.sub(uuid_regex, '', out) | |
def get_running_cuda_version(run_lambda): | |
return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)') | |
def get_cudnn_version(run_lambda): | |
"""Return a list of libcudnn.so; it's hard to tell which one is being used.""" | |
if get_platform() == 'win32': | |
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') | |
cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%") | |
where_cmd = os.path.join(system_root, 'System32', 'where') | |
cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) | |
elif get_platform() == 'darwin': | |
# CUDA libraries and drivers can be found in /usr/local/cuda/. See | |
# https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install | |
# https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac | |
# Use CUDNN_LIBRARY when cudnn library is installed elsewhere. | |
cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*' | |
else: | |
cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' | |
rc, out, _ = run_lambda(cudnn_cmd) | |
# find will return 1 if there are permission errors or if not found | |
if len(out) == 0 or (rc != 1 and rc != 0): | |
l = os.environ.get('CUDNN_LIBRARY') | |
if l is not None and os.path.isfile(l): | |
return os.path.realpath(l) | |
return None | |
files_set = set() | |
for fn in out.split('\n'): | |
fn = os.path.realpath(fn) # eliminate symbolic links | |
if os.path.isfile(fn): | |
files_set.add(fn) | |
if not files_set: | |
return None | |
# Alphabetize the result because the order is non-deterministic otherwise | |
files = sorted(files_set) | |
if len(files) == 1: | |
return files[0] | |
result = '\n'.join(files) | |
return 'Probably one of the following:\n{}'.format(result) | |
def get_nvidia_smi(): | |
# Note: nvidia-smi is currently available only on Windows and Linux | |
smi = 'nvidia-smi' | |
if get_platform() == 'win32': | |
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') | |
program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files') | |
legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi) | |
new_path = os.path.join(system_root, 'System32', smi) | |
smis = [new_path, legacy_path] | |
for candidate_smi in smis: | |
if os.path.exists(candidate_smi): | |
smi = '"{}"'.format(candidate_smi) | |
break | |
return smi | |
# example outputs of CPU infos | |
# * linux | |
# Architecture: x86_64 | |
# CPU op-mode(s): 32-bit, 64-bit | |
# Address sizes: 46 bits physical, 48 bits virtual | |
# Byte Order: Little Endian | |
# CPU(s): 128 | |
# On-line CPU(s) list: 0-127 | |
# Vendor ID: GenuineIntel | |
# Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz | |
# CPU family: 6 | |
# Model: 106 | |
# Thread(s) per core: 2 | |
# Core(s) per socket: 32 | |
# Socket(s): 2 | |
# Stepping: 6 | |
# BogoMIPS: 5799.78 | |
# Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr | |
# sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl | |
# xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 | |
# pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand | |
# hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced | |
# fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap | |
# avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 | |
# xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq | |
# avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities | |
# Virtualization features: | |
# Hypervisor vendor: KVM | |
# Virtualization type: full | |
# Caches (sum of all): | |
# L1d: 3 MiB (64 instances) | |
# L1i: 2 MiB (64 instances) | |
# L2: 80 MiB (64 instances) | |
# L3: 108 MiB (2 instances) | |
# NUMA: | |
# NUMA node(s): 2 | |
# NUMA node0 CPU(s): 0-31,64-95 | |
# NUMA node1 CPU(s): 32-63,96-127 | |
# Vulnerabilities: | |
# Itlb multihit: Not affected | |
# L1tf: Not affected | |
# Mds: Not affected | |
# Meltdown: Not affected | |
# Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown | |
# Retbleed: Not affected | |
# Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp | |
# Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization | |
# Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence | |
# Srbds: Not affected | |
# Tsx async abort: Not affected | |
# * win32 | |
# Architecture=9 | |
# CurrentClockSpeed=2900 | |
# DeviceID=CPU0 | |
# Family=179 | |
# L2CacheSize=40960 | |
# L2CacheSpeed= | |
# Manufacturer=GenuineIntel | |
# MaxClockSpeed=2900 | |
# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz | |
# ProcessorType=3 | |
# Revision=27142 | |
# | |
# Architecture=9 | |
# CurrentClockSpeed=2900 | |
# DeviceID=CPU1 | |
# Family=179 | |
# L2CacheSize=40960 | |
# L2CacheSpeed= | |
# Manufacturer=GenuineIntel | |
# MaxClockSpeed=2900 | |
# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz | |
# ProcessorType=3 | |
# Revision=27142 | |
def get_cpu_info(run_lambda): | |
rc, out, err = 0, '', '' | |
if get_platform() == 'linux': | |
rc, out, err = run_lambda('lscpu') | |
elif get_platform() == 'win32': | |
rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \ | |
CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE') | |
elif get_platform() == 'darwin': | |
rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string") | |
cpu_info = 'None' | |
if rc == 0: | |
cpu_info = out | |
else: | |
cpu_info = err | |
return cpu_info | |
def get_platform(): | |
if sys.platform.startswith('linux'): | |
return 'linux' | |
elif sys.platform.startswith('win32'): | |
return 'win32' | |
elif sys.platform.startswith('cygwin'): | |
return 'cygwin' | |
elif sys.platform.startswith('darwin'): | |
return 'darwin' | |
else: | |
return sys.platform | |
def get_mac_version(run_lambda): | |
return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)') | |
def get_windows_version(run_lambda): | |
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') | |
wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic') | |
findstr_cmd = os.path.join(system_root, 'System32', 'findstr') | |
return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd)) | |
def get_lsb_version(run_lambda): | |
return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)') | |
def check_release_file(run_lambda): | |
return run_and_parse_first_match(run_lambda, 'cat /etc/*-release', | |
r'PRETTY_NAME="(.*)"') | |
def get_os(run_lambda): | |
from platform import machine | |
platform = get_platform() | |
if platform == 'win32' or platform == 'cygwin': | |
return get_windows_version(run_lambda) | |
if platform == 'darwin': | |
version = get_mac_version(run_lambda) | |
if version is None: | |
return None | |
return 'macOS {} ({})'.format(version, machine()) | |
if platform == 'linux': | |
# Ubuntu/Debian based | |
desc = get_lsb_version(run_lambda) | |
if desc is not None: | |
return '{} ({})'.format(desc, machine()) | |
# Try reading /etc/*-release | |
desc = check_release_file(run_lambda) | |
if desc is not None: | |
return '{} ({})'.format(desc, machine()) | |
return '{} ({})'.format(platform, machine()) | |
# Unknown platform | |
return platform | |
def get_python_platform(): | |
import platform | |
return platform.platform() | |
def get_libc_version(): | |
import platform | |
if get_platform() != 'linux': | |
return 'N/A' | |
return '-'.join(platform.libc_ver()) | |
def get_pip_packages(run_lambda, patterns=None): | |
"""Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages.""" | |
if patterns is None: | |
patterns = DEFAULT_PIP_PATTERNS | |
# People generally have `pip` as `pip` or `pip3` | |
# But here it is invoked as `python -mpip` | |
def run_with_pip(pip): | |
out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"]) | |
return "\n".join( | |
line | |
for line in out.splitlines() | |
if any(name in line for name in patterns) | |
) | |
pip_version = 'pip3' if sys.version[0] == '3' else 'pip' | |
out = run_with_pip([sys.executable, '-mpip']) | |
return pip_version, out | |
def get_cachingallocator_config(): | |
ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '') | |
return ca_config | |
def get_cuda_module_loading_config(): | |
if TORCH_AVAILABLE and torch.cuda.is_available(): | |
torch.cuda.init() | |
config = os.environ.get('CUDA_MODULE_LOADING', '') | |
return config | |
else: | |
return "N/A" | |
def is_xnnpack_available(): | |
if TORCH_AVAILABLE: | |
import torch.backends.xnnpack | |
return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined] | |
else: | |
return "N/A" | |
def get_env_info(): | |
run_lambda = run | |
pip_version, pip_list_output = get_pip_packages(run_lambda) | |
if TORCH_AVAILABLE: | |
version_str = torch.__version__ | |
debug_mode_str = str(torch.version.debug) | |
cuda_available_str = str(torch.cuda.is_available()) | |
cuda_version_str = torch.version.cuda | |
if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version | |
hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' | |
else: # HIP version | |
def get_version_or_na(cfg, prefix): | |
_lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s] | |
return _lst[0] if _lst else 'N/A' | |
cfg = torch._C._show_config().split('\n') | |
hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime') | |
miopen_runtime_version = get_version_or_na(cfg, 'MIOpen') | |
cuda_version_str = 'N/A' | |
hip_compiled_version = torch.version.hip | |
else: | |
version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A' | |
hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' | |
sys_version = sys.version.replace("\n", " ") | |
conda_packages = get_conda_packages(run_lambda) | |
return SystemEnv( | |
torch_version=version_str, | |
is_debug_build=debug_mode_str, | |
python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1), | |
python_platform=get_python_platform(), | |
is_cuda_available=cuda_available_str, | |
cuda_compiled_version=cuda_version_str, | |
cuda_runtime_version=get_running_cuda_version(run_lambda), | |
cuda_module_loading=get_cuda_module_loading_config(), | |
nvidia_gpu_models=get_gpu_info(run_lambda), | |
nvidia_driver_version=get_nvidia_driver_version(run_lambda), | |
cudnn_version=get_cudnn_version(run_lambda), | |
hip_compiled_version=hip_compiled_version, | |
hip_runtime_version=hip_runtime_version, | |
miopen_runtime_version=miopen_runtime_version, | |
pip_version=pip_version, | |
pip_packages=pip_list_output, | |
conda_packages=conda_packages, | |
os=get_os(run_lambda), | |
libc_version=get_libc_version(), | |
gcc_version=get_gcc_version(run_lambda), | |
clang_version=get_clang_version(run_lambda), | |
cmake_version=get_cmake_version(run_lambda), | |
caching_allocator_config=get_cachingallocator_config(), | |
is_xnnpack_available=is_xnnpack_available(), | |
cpu_info=get_cpu_info(run_lambda), | |
) | |
env_info_fmt = """ | |
PyTorch version: {torch_version} | |
Is debug build: {is_debug_build} | |
CUDA used to build PyTorch: {cuda_compiled_version} | |
ROCM used to build PyTorch: {hip_compiled_version} | |
OS: {os} | |
GCC version: {gcc_version} | |
Clang version: {clang_version} | |
CMake version: {cmake_version} | |
Libc version: {libc_version} | |
Python version: {python_version} | |
Python platform: {python_platform} | |
Is CUDA available: {is_cuda_available} | |
CUDA runtime version: {cuda_runtime_version} | |
CUDA_MODULE_LOADING set to: {cuda_module_loading} | |
GPU models and configuration: {nvidia_gpu_models} | |
Nvidia driver version: {nvidia_driver_version} | |
cuDNN version: {cudnn_version} | |
HIP runtime version: {hip_runtime_version} | |
MIOpen runtime version: {miopen_runtime_version} | |
Is XNNPACK available: {is_xnnpack_available} | |
CPU: | |
{cpu_info} | |
Versions of relevant libraries: | |
{pip_packages} | |
{conda_packages} | |
""".strip() | |
def pretty_str(envinfo): | |
def replace_nones(dct, replacement='Could not collect'): | |
for key in dct.keys(): | |
if dct[key] is not None: | |
continue | |
dct[key] = replacement | |
return dct | |
def replace_bools(dct, true='Yes', false='No'): | |
for key in dct.keys(): | |
if dct[key] is True: | |
dct[key] = true | |
elif dct[key] is False: | |
dct[key] = false | |
return dct | |
def prepend(text, tag='[prepend]'): | |
lines = text.split('\n') | |
updated_lines = [tag + line for line in lines] | |
return '\n'.join(updated_lines) | |
def replace_if_empty(text, replacement='No relevant packages'): | |
if text is not None and len(text) == 0: | |
return replacement | |
return text | |
def maybe_start_on_next_line(string): | |
# If `string` is multiline, prepend a \n to it. | |
if string is not None and len(string.split('\n')) > 1: | |
return '\n{}\n'.format(string) | |
return string | |
mutable_dict = envinfo._asdict() | |
# If nvidia_gpu_models is multiline, start on the next line | |
mutable_dict['nvidia_gpu_models'] = \ | |
maybe_start_on_next_line(envinfo.nvidia_gpu_models) | |
# If the machine doesn't have CUDA, report some fields as 'No CUDA' | |
dynamic_cuda_fields = [ | |
'cuda_runtime_version', | |
'nvidia_gpu_models', | |
'nvidia_driver_version', | |
] | |
all_cuda_fields = dynamic_cuda_fields + ['cudnn_version'] | |
all_dynamic_cuda_fields_missing = all( | |
mutable_dict[field] is None for field in dynamic_cuda_fields) | |
if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing: | |
for field in all_cuda_fields: | |
mutable_dict[field] = 'No CUDA' | |
if envinfo.cuda_compiled_version is None: | |
mutable_dict['cuda_compiled_version'] = 'None' | |
# Replace True with Yes, False with No | |
mutable_dict = replace_bools(mutable_dict) | |
# Replace all None objects with 'Could not collect' | |
mutable_dict = replace_nones(mutable_dict) | |
# If either of these are '', replace with 'No relevant packages' | |
mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages']) | |
mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages']) | |
# Tag conda and pip packages with a prefix | |
# If they were previously None, they'll show up as ie '[conda] Could not collect' | |
if mutable_dict['pip_packages']: | |
mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'], | |
'[{}] '.format(envinfo.pip_version)) | |
if mutable_dict['conda_packages']: | |
mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'], | |
'[conda] ') | |
mutable_dict['cpu_info'] = envinfo.cpu_info | |
return env_info_fmt.format(**mutable_dict) | |
def get_pretty_env_info(): | |
return pretty_str(get_env_info()) | |
def main(): | |
print("Collecting environment information...") | |
output = get_pretty_env_info() | |
print(output) | |
if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'): | |
minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR | |
if sys.platform == "linux" and os.path.exists(minidump_dir): | |
dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)] | |
latest = max(dumps, key=os.path.getctime) | |
ctime = os.path.getctime(latest) | |
creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S') | |
msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \ | |
"if this is related to your bug please include it when you file a report ***" | |
print(msg, file=sys.stderr) | |
if __name__ == '__main__': | |
main() | |