Spaces:
Runtime error
Runtime error
| # Copyright 2022 The HuggingFace Team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from .constants import ( | |
| MODEL_NAME, | |
| OPTIMIZER_NAME, | |
| RNG_STATE_NAME, | |
| SAFE_MODEL_NAME, | |
| SAFE_WEIGHTS_INDEX_NAME, | |
| SAFE_WEIGHTS_NAME, | |
| SAMPLER_NAME, | |
| SCALER_NAME, | |
| SCHEDULER_NAME, | |
| TORCH_DISTRIBUTED_OPERATION_TYPES, | |
| TORCH_LAUNCH_PARAMS, | |
| WEIGHTS_INDEX_NAME, | |
| WEIGHTS_NAME, | |
| ) | |
| from .dataclasses import ( | |
| AutocastKwargs, | |
| BnbQuantizationConfig, | |
| ComputeEnvironment, | |
| CustomDtype, | |
| DataLoaderConfiguration, | |
| DeepSpeedPlugin, | |
| DistributedDataParallelKwargs, | |
| DistributedType, | |
| DynamoBackend, | |
| FP8RecipeKwargs, | |
| FullyShardedDataParallelPlugin, | |
| GradientAccumulationPlugin, | |
| GradScalerKwargs, | |
| InitProcessGroupKwargs, | |
| KwargsHandler, | |
| LoggerType, | |
| MegatronLMPlugin, | |
| PrecisionType, | |
| ProjectConfiguration, | |
| RNGType, | |
| SageMakerDistributedType, | |
| TensorInformation, | |
| TorchDynamoPlugin, | |
| ) | |
| from .environment import ( | |
| are_libraries_initialized, | |
| check_cuda_p2p_ib_support, | |
| check_fp8_capability, | |
| convert_dict_to_env_variables, | |
| get_cpu_distributed_information, | |
| get_gpu_info, | |
| get_int_from_env, | |
| parse_choice_from_env, | |
| parse_flag_from_env, | |
| set_numa_affinity, | |
| str_to_bool, | |
| ) | |
| from .imports import ( | |
| get_ccl_version, | |
| is_4bit_bnb_available, | |
| is_8bit_bnb_available, | |
| is_aim_available, | |
| is_bf16_available, | |
| is_bnb_available, | |
| is_boto3_available, | |
| is_ccl_available, | |
| is_clearml_available, | |
| is_comet_ml_available, | |
| is_cuda_available, | |
| is_datasets_available, | |
| is_deepspeed_available, | |
| is_dvclive_available, | |
| is_fp8_available, | |
| is_ipex_available, | |
| is_megatron_lm_available, | |
| is_mlflow_available, | |
| is_mlu_available, | |
| is_mps_available, | |
| is_msamp_available, | |
| is_npu_available, | |
| is_pandas_available, | |
| is_peft_available, | |
| is_pippy_available, | |
| is_pynvml_available, | |
| is_rich_available, | |
| is_sagemaker_available, | |
| is_tensorboard_available, | |
| is_timm_available, | |
| is_torch_xla_available, | |
| is_transformer_engine_available, | |
| is_transformers_available, | |
| is_wandb_available, | |
| is_xpu_available, | |
| ) | |
| from .modeling import ( | |
| calculate_maximum_sizes, | |
| check_device_map, | |
| check_tied_parameters_in_config, | |
| check_tied_parameters_on_same_device, | |
| compute_module_sizes, | |
| convert_file_size_to_int, | |
| dtype_byte_size, | |
| find_tied_parameters, | |
| get_balanced_memory, | |
| get_max_layer_size, | |
| get_max_memory, | |
| get_mixed_precision_context_manager, | |
| id_tensor_storage, | |
| infer_auto_device_map, | |
| is_peft_model, | |
| load_checkpoint_in_model, | |
| load_offloaded_weights, | |
| load_state_dict, | |
| named_module_tensors, | |
| retie_parameters, | |
| set_module_tensor_to_device, | |
| shard_checkpoint, | |
| ) | |
| from .offload import ( | |
| OffloadedWeightsLoader, | |
| PrefixedDataset, | |
| extract_submodules_state_dict, | |
| load_offloaded_weight, | |
| offload_state_dict, | |
| offload_weight, | |
| save_offload_index, | |
| ) | |
| from .operations import ( | |
| CannotPadNestedTensorWarning, | |
| broadcast, | |
| broadcast_object_list, | |
| concatenate, | |
| convert_outputs_to_fp32, | |
| convert_to_fp32, | |
| copy_tensor_to_devices, | |
| find_batch_size, | |
| find_device, | |
| gather, | |
| gather_object, | |
| get_data_structure, | |
| honor_type, | |
| ignorant_find_batch_size, | |
| initialize_tensors, | |
| is_namedtuple, | |
| is_tensor_information, | |
| is_torch_tensor, | |
| listify, | |
| pad_across_processes, | |
| pad_input_tensors, | |
| recursively_apply, | |
| reduce, | |
| send_to_device, | |
| slice_tensors, | |
| ) | |
| from .versions import compare_versions, is_torch_version | |
| if is_deepspeed_available(): | |
| from .deepspeed import ( | |
| DeepSpeedEngineWrapper, | |
| DeepSpeedOptimizerWrapper, | |
| DeepSpeedSchedulerWrapper, | |
| DummyOptim, | |
| DummyScheduler, | |
| HfDeepSpeedConfig, | |
| ) | |
| from .bnb import has_4bit_bnb_layers, load_and_quantize_model | |
| from .fsdp_utils import load_fsdp_model, load_fsdp_optimizer, save_fsdp_model, save_fsdp_optimizer | |
| from .launch import ( | |
| PrepareForLaunch, | |
| _filter_args, | |
| prepare_deepspeed_cmd_env, | |
| prepare_multi_gpu_env, | |
| prepare_sagemager_args_inputs, | |
| prepare_simple_launcher_cmd_env, | |
| prepare_tpu, | |
| ) | |
| from .megatron_lm import ( | |
| AbstractTrainStep, | |
| BertTrainStep, | |
| GPTTrainStep, | |
| MegatronEngine, | |
| MegatronLMDummyDataLoader, | |
| MegatronLMDummyScheduler, | |
| MegatronLMOptimizerWrapper, | |
| MegatronLMSchedulerWrapper, | |
| T5TrainStep, | |
| avg_losses_across_data_parallel_group, | |
| gather_across_data_parallel_groups, | |
| ) | |
| from .megatron_lm import initialize as megatron_lm_initialize | |
| from .megatron_lm import prepare_data_loader as megatron_lm_prepare_data_loader | |
| from .megatron_lm import prepare_model as megatron_lm_prepare_model | |
| from .megatron_lm import prepare_optimizer as megatron_lm_prepare_optimizer | |
| from .megatron_lm import prepare_scheduler as megatron_lm_prepare_scheduler | |
| from .memory import find_executable_batch_size, release_memory | |
| from .other import ( | |
| check_os_kernel, | |
| clean_state_dict_for_safetensors, | |
| clear_environment, | |
| convert_bytes, | |
| extract_model_from_parallel, | |
| get_pretty_name, | |
| is_port_in_use, | |
| merge_dicts, | |
| patch_environment, | |
| recursive_getattr, | |
| save, | |
| wait_for_everyone, | |
| write_basic_config, | |
| ) | |
| from .random import set_seed, synchronize_rng_state, synchronize_rng_states | |
| from .torch_xla import install_xla | |
| from .tqdm import tqdm | |
| from .transformer_engine import convert_model, has_transformer_engine_layers | |