model1 / llava /train /arguments.py
multitensor's picture
Upload folder using huggingface_hub
bbfa6f6 verified
import transformers
from typing import Dict, Optional, Sequence, List
from dataclasses import dataclass, field
@dataclass
class ModelArguments:
model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
version: Optional[str] = field(default="v0")
freeze_backbone: bool = field(default=False)
tune_mm_mlp_adapter: bool = field(default=False)
vision_tower: Optional[str] = field(default=None)
mm_vision_select_layer: Optional[int] = field(default=-1) # default to the last layer
pretrain_mm_mlp_adapter: Optional[str] = field(default=None)
mm_projector_type: Optional[str] = field(default='linear')
mm_use_start_end: bool = field(default=False)
mm_use_patch_token: bool = field(default=True)
mm_patch_merge_type: Optional[str] = field(default='flat')
mm_vision_select_feature: Optional[str] = field(default="patch")
image_grid_pinpoints: Optional[str] = field(default="[(448, 448)]")
img_size: int = 224
drop_path_rate: float = 0.
vit_precision: Optional[str] = field(default="fp16")
vit_model_path: Optional[str] = field(default=None)
qformer_model_path: Optional[str] = field(default=None)
num_query_token: int = 32
adapter_module_name: Optional[str] = field(default=None)
adapter_module_path: Optional[str] = field(default=None)
@dataclass
class DataArguments:
dataset_config: str = field(default="",
metadata={"help": "Training dataset config path"})
# data_path: str = field(default=None,
# metadata={"help": "Path to the training data."})
lazy_preprocess: bool = False
is_multimodal: bool = False
# image_folder: Optional[str] = field(default=None)
image_aspect_ratio: str = 'square'
# num_segments: int = 10
num_segments: int = 10
sample_strategy: str = 'fps0.5'
external_args: dict = None
num_token_per_image: Optional[int] = field(default=32)
@dataclass
class TrainingArguments(transformers.TrainingArguments):
cache_dir: Optional[str] = field(default=None)
optim: str = field(default="adamw_torch")
remove_unused_columns: bool = field(default=False)
freeze_mm_mlp_adapter: bool = field(default=False)
freeze_qformer: bool = field(default=True)
freeze_adapter: bool = field(default=False)
mpt_attn_impl: Optional[str] = field(default="triton")
model_max_length: int = field(
default=512,
metadata={
"help":
"Maximum sequence length. Sequences will be right padded (and possibly truncated)."
},
)
double_quant: bool = field(
default=True,
metadata={"help": "Compress the quantization statistics through double quantization."}
)
quant_type: str = field(
default="nf4",
metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."}
)
bits: int = field(
default=16,
metadata={"help": "How many bits to use."}
)
lora_enable: bool = False
lora_r: int = 64
lora_alpha: int = 16
lora_dropout: float = 0.05
lora_weight_path: str = ""
lora_bias: str = "none"
mm_projector_lr: Optional[float] = None
lora_lr: Optional[float] = None
group_by_modality_length: bool = field(default=False)