# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # # This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import os import sys from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union import torch import transformers import yaml from transformers import HfArgumentParser from transformers.integrations import is_deepspeed_zero3_enabled from transformers.trainer_utils import get_last_checkpoint from transformers.training_args import ParallelMode from transformers.utils import is_torch_bf16_gpu_available, is_torch_npu_available from ..extras import logging from ..extras.constants import CHECKPOINT_NAMES from ..extras.misc import check_dependencies, check_version, get_current_device, is_env_enabled from .data_args import DataArguments from .evaluation_args import EvaluationArguments from .finetuning_args import FinetuningArguments from .generating_args import GeneratingArguments from .model_args import ModelArguments from .training_args import RayArguments, TrainingArguments logger = logging.get_logger(__name__) check_dependencies() _TRAIN_ARGS = [ModelArguments, DataArguments, TrainingArguments, FinetuningArguments, GeneratingArguments] _TRAIN_CLS = Tuple[ModelArguments, DataArguments, TrainingArguments, FinetuningArguments, GeneratingArguments] _INFER_ARGS = [ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments] _INFER_CLS = Tuple[ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments] _EVAL_ARGS = [ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments] _EVAL_CLS = Tuple[ModelArguments, DataArguments, EvaluationArguments, FinetuningArguments] def read_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> Union[Dict[str, Any], List[str]]: r""" Gets arguments from the command line or a config file. """ if args is not None: return args if len(sys.argv) == 2 and (sys.argv[1].endswith(".yaml") or sys.argv[1].endswith(".yml")): return yaml.safe_load(Path(sys.argv[1]).absolute().read_text()) elif len(sys.argv) == 2 and sys.argv[1].endswith(".json"): return json.loads(Path(sys.argv[1]).absolute().read_text()) else: return sys.argv[1:] def _parse_args( parser: "HfArgumentParser", args: Optional[Union[Dict[str, Any], List[str]]] = None, allow_extra_keys: bool = False ) -> Tuple[Any]: args = read_args(args) if isinstance(args, dict): return parser.parse_dict(args, allow_extra_keys=allow_extra_keys) (*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(args=args, return_remaining_strings=True) if unknown_args and not allow_extra_keys: print(parser.format_help()) print(f"Got unknown args, potentially deprecated arguments: {unknown_args}") raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {unknown_args}") return tuple(parsed_args) def _set_transformers_logging() -> None: if os.getenv("LLAMAFACTORY_VERBOSITY", "INFO") in ["DEBUG", "INFO"]: transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() def _verify_model_args( model_args: "ModelArguments", data_args: "DataArguments", finetuning_args: "FinetuningArguments", ) -> None: if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora": raise ValueError("Adapter is only valid for the LoRA method.") if model_args.quantization_bit is not None: if finetuning_args.finetuning_type != "lora": raise ValueError("Quantization is only compatible with the LoRA method.") if finetuning_args.pissa_init: raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA for a quantized model.") if model_args.resize_vocab: raise ValueError("Cannot resize embedding layers of a quantized model.") if model_args.adapter_name_or_path is not None and finetuning_args.create_new_adapter: raise ValueError("Cannot create new adapter upon a quantized model.") if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1: raise ValueError("Quantized model only accepts a single adapter. Merge them first.") if data_args.template == "yi" and model_args.use_fast_tokenizer: logger.warning_rank0("We should use slow tokenizer for the Yi models. Change `use_fast_tokenizer` to False.") model_args.use_fast_tokenizer = False def _check_extra_dependencies( model_args: "ModelArguments", finetuning_args: "FinetuningArguments", training_args: Optional["TrainingArguments"] = None, ) -> None: if model_args.use_unsloth: check_version("unsloth", mandatory=True) if model_args.enable_liger_kernel: check_version("liger-kernel", mandatory=True) if model_args.mixture_of_depths is not None: check_version("mixture-of-depth>=1.1.6", mandatory=True) if model_args.infer_backend == "vllm": check_version("vllm>=0.4.3,<=0.7.2") check_version("vllm", mandatory=True) if finetuning_args.use_galore: check_version("galore_torch", mandatory=True) if finetuning_args.use_apollo: check_version("apollo_torch", mandatory=True) if finetuning_args.use_badam: check_version("badam>=1.2.1", mandatory=True) if finetuning_args.use_adam_mini: check_version("adam-mini", mandatory=True) if finetuning_args.plot_loss: check_version("matplotlib", mandatory=True) if training_args is not None and training_args.predict_with_generate: check_version("jieba", mandatory=True) check_version("nltk", mandatory=True) check_version("rouge_chinese", mandatory=True) def _parse_train_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _TRAIN_CLS: parser = HfArgumentParser(_TRAIN_ARGS) allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS") return _parse_args(parser, args, allow_extra_keys=allow_extra_keys) def _parse_infer_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _INFER_CLS: parser = HfArgumentParser(_INFER_ARGS) allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS") return _parse_args(parser, args, allow_extra_keys=allow_extra_keys) def _parse_eval_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _EVAL_CLS: parser = HfArgumentParser(_EVAL_ARGS) allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS") return _parse_args(parser, args, allow_extra_keys=allow_extra_keys) def get_ray_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> RayArguments: parser = HfArgumentParser(RayArguments) (ray_args,) = _parse_args(parser, args, allow_extra_keys=True) return ray_args def get_train_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _TRAIN_CLS: model_args, data_args, training_args, finetuning_args, generating_args = _parse_train_args(args) # Setup logging if training_args.should_log: _set_transformers_logging() # Check arguments if finetuning_args.stage != "sft": if training_args.predict_with_generate: raise ValueError("`predict_with_generate` cannot be set as True except SFT.") if data_args.neat_packing: raise ValueError("`neat_packing` cannot be set as True except SFT.") if data_args.train_on_prompt or data_args.mask_history: raise ValueError("`train_on_prompt` or `mask_history` cannot be set as True except SFT.") if finetuning_args.stage == "sft" and training_args.do_predict and not training_args.predict_with_generate: raise ValueError("Please enable `predict_with_generate` to save model predictions.") if finetuning_args.stage in ["rm", "ppo"] and training_args.load_best_model_at_end: raise ValueError("RM and PPO stages do not support `load_best_model_at_end`.") if finetuning_args.stage == "ppo": if not training_args.do_train: raise ValueError("PPO training does not support evaluation, use the SFT stage to evaluate models.") if model_args.shift_attn: raise ValueError("PPO training is incompatible with S^2-Attn.") if finetuning_args.reward_model_type == "lora" and model_args.use_unsloth: raise ValueError("Unsloth does not support lora reward model.") if training_args.report_to and training_args.report_to[0] not in ["wandb", "tensorboard"]: raise ValueError("PPO only accepts wandb or tensorboard logger.") if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED: raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.") if training_args.deepspeed and training_args.parallel_mode != ParallelMode.DISTRIBUTED: raise ValueError("Please use `FORCE_TORCHRUN=1` to launch DeepSpeed training.") if training_args.max_steps == -1 and data_args.streaming: raise ValueError("Please specify `max_steps` in streaming mode.") if training_args.do_train and data_args.dataset is None: raise ValueError("Please specify dataset for training.") if (training_args.do_eval or training_args.do_predict) and ( data_args.eval_dataset is None and data_args.val_size < 1e-6 ): raise ValueError("Please specify dataset for evaluation.") if training_args.predict_with_generate: if is_deepspeed_zero3_enabled(): raise ValueError("`predict_with_generate` is incompatible with DeepSpeed ZeRO-3.") if data_args.eval_dataset is None: raise ValueError("Cannot use `predict_with_generate` if `eval_dataset` is None.") if finetuning_args.compute_accuracy: raise ValueError("Cannot use `predict_with_generate` and `compute_accuracy` together.") if training_args.do_train and model_args.quantization_device_map == "auto": raise ValueError("Cannot use device map for quantized models in training.") if finetuning_args.pissa_init and is_deepspeed_zero3_enabled(): raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA in DeepSpeed ZeRO-3.") if finetuning_args.pure_bf16: if not (is_torch_bf16_gpu_available() or (is_torch_npu_available() and torch.npu.is_bf16_supported())): raise ValueError("This device does not support `pure_bf16`.") if is_deepspeed_zero3_enabled(): raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.") if training_args.parallel_mode == ParallelMode.DISTRIBUTED: if finetuning_args.use_galore and finetuning_args.galore_layerwise: raise ValueError("Distributed training does not support layer-wise GaLore.") if finetuning_args.use_apollo and finetuning_args.apollo_layerwise: raise ValueError("Distributed training does not support layer-wise APOLLO.") if finetuning_args.use_badam: if finetuning_args.badam_mode == "ratio": raise ValueError("Radio-based BAdam does not yet support distributed training, use layer-wise BAdam.") elif not is_deepspeed_zero3_enabled(): raise ValueError("Layer-wise BAdam only supports DeepSpeed ZeRO-3 training.") if training_args.deepspeed is not None and (finetuning_args.use_galore or finetuning_args.use_apollo): raise ValueError("GaLore and APOLLO are incompatible with DeepSpeed yet.") if model_args.infer_backend == "vllm": raise ValueError("vLLM backend is only available for API, CLI and Web.") if model_args.use_unsloth and is_deepspeed_zero3_enabled(): raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.") if data_args.neat_packing and not data_args.packing: logger.warning_rank0("`neat_packing` requires `packing` is True. Change `packing` to True.") data_args.packing = True _verify_model_args(model_args, data_args, finetuning_args) _check_extra_dependencies(model_args, finetuning_args, training_args) if ( training_args.do_train and finetuning_args.finetuning_type == "lora" and model_args.quantization_bit is None and model_args.resize_vocab and finetuning_args.additional_target is None ): logger.warning_rank0( "Remember to add embedding layers to `additional_target` to make the added tokens trainable." ) if training_args.do_train and model_args.quantization_bit is not None and (not model_args.upcast_layernorm): logger.warning_rank0("We recommend enable `upcast_layernorm` in quantized training.") if training_args.do_train and (not training_args.fp16) and (not training_args.bf16): logger.warning_rank0("We recommend enable mixed precision training.") if ( training_args.do_train and (finetuning_args.use_galore or finetuning_args.use_apollo) and not finetuning_args.pure_bf16 ): logger.warning_rank0( "Using GaLore or APOLLO with mixed precision training may significantly increases GPU memory usage." ) if (not training_args.do_train) and model_args.quantization_bit is not None: logger.warning_rank0("Evaluating model in 4/8-bit mode may cause lower scores.") if (not training_args.do_train) and finetuning_args.stage == "dpo" and finetuning_args.ref_model is None: logger.warning_rank0("Specify `ref_model` for computing rewards at evaluation.") # Post-process training arguments if ( training_args.parallel_mode == ParallelMode.DISTRIBUTED and training_args.ddp_find_unused_parameters is None and finetuning_args.finetuning_type == "lora" ): logger.warning_rank0("`ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.") training_args.ddp_find_unused_parameters = False if finetuning_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type in ["full", "freeze"]: can_resume_from_checkpoint = False if training_args.resume_from_checkpoint is not None: logger.warning_rank0("Cannot resume from checkpoint in current stage.") training_args.resume_from_checkpoint = None else: can_resume_from_checkpoint = True if ( training_args.resume_from_checkpoint is None and training_args.do_train and os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir and can_resume_from_checkpoint ): last_checkpoint = get_last_checkpoint(training_args.output_dir) if last_checkpoint is None and any( os.path.isfile(os.path.join(training_args.output_dir, name)) for name in CHECKPOINT_NAMES ): raise ValueError("Output directory already exists and is not empty. Please set `overwrite_output_dir`.") if last_checkpoint is not None: training_args.resume_from_checkpoint = last_checkpoint logger.info_rank0(f"Resuming training from {training_args.resume_from_checkpoint}.") logger.info_rank0("Change `output_dir` or use `overwrite_output_dir` to avoid.") if ( finetuning_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type == "lora" and training_args.resume_from_checkpoint is not None ): logger.warning_rank0( "Add {} to `adapter_name_or_path` to resume training from checkpoint.".format( training_args.resume_from_checkpoint ) ) # Post-process model arguments if training_args.bf16 or finetuning_args.pure_bf16: model_args.compute_dtype = torch.bfloat16 elif training_args.fp16: model_args.compute_dtype = torch.float16 model_args.device_map = {"": get_current_device()} model_args.model_max_length = data_args.cutoff_len model_args.block_diag_attn = data_args.neat_packing data_args.packing = data_args.packing if data_args.packing is not None else finetuning_args.stage == "pt" # Log on each process the small summary logger.info( "Process rank: {}, device: {}, n_gpu: {}, distributed training: {}, compute dtype: {}".format( training_args.local_rank, training_args.device, training_args.n_gpu, training_args.parallel_mode == ParallelMode.DISTRIBUTED, str(model_args.compute_dtype), ) ) transformers.set_seed(training_args.seed) return model_args, data_args, training_args, finetuning_args, generating_args def get_infer_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _INFER_CLS: model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args) _set_transformers_logging() if model_args.infer_backend == "vllm": if finetuning_args.stage != "sft": raise ValueError("vLLM engine only supports auto-regressive models.") if model_args.quantization_bit is not None: raise ValueError("vLLM engine does not support bnb quantization (GPTQ and AWQ are supported).") if model_args.rope_scaling is not None: raise ValueError("vLLM engine does not support RoPE scaling.") if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1: raise ValueError("vLLM only accepts a single adapter. Merge them first.") _verify_model_args(model_args, data_args, finetuning_args) _check_extra_dependencies(model_args, finetuning_args) if model_args.export_dir is not None and model_args.export_device == "cpu": model_args.device_map = {"": torch.device("cpu")} model_args.model_max_length = data_args.cutoff_len else: model_args.device_map = "auto" return model_args, data_args, finetuning_args, generating_args def get_eval_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _EVAL_CLS: model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args) _set_transformers_logging() if model_args.infer_backend == "vllm": raise ValueError("vLLM backend is only available for API, CLI and Web.") _verify_model_args(model_args, data_args, finetuning_args) _check_extra_dependencies(model_args, finetuning_args) model_args.device_map = "auto" transformers.set_seed(eval_args.seed) return model_args, data_args, eval_args, finetuning_args