|
import transformers |
|
|
|
from typing import Dict, Optional, Sequence, List |
|
from dataclasses import dataclass, field |
|
|
|
@dataclass |
|
class ModelArguments: |
|
model_name_or_path: Optional[str] = field(default="facebook/opt-125m") |
|
version: Optional[str] = field(default="v0") |
|
freeze_backbone: bool = field(default=False) |
|
tune_mm_mlp_adapter: bool = field(default=False) |
|
vision_tower: Optional[str] = field(default=None) |
|
mm_vision_select_layer: Optional[int] = field(default=-1) |
|
pretrain_mm_mlp_adapter: Optional[str] = field(default=None) |
|
mm_projector_type: Optional[str] = field(default='linear') |
|
mm_use_start_end: bool = field(default=False) |
|
mm_use_patch_token: bool = field(default=True) |
|
mm_patch_merge_type: Optional[str] = field(default='flat') |
|
mm_vision_select_feature: Optional[str] = field(default="patch") |
|
image_grid_pinpoints: Optional[str] = field(default="[(448, 448)]") |
|
|
|
img_size: int = 224 |
|
drop_path_rate: float = 0. |
|
vit_precision: Optional[str] = field(default="fp16") |
|
vit_model_path: Optional[str] = field(default=None) |
|
qformer_model_path: Optional[str] = field(default=None) |
|
num_query_token: int = 32 |
|
|
|
adapter_module_name: Optional[str] = field(default=None) |
|
adapter_module_path: Optional[str] = field(default=None) |
|
|
|
@dataclass |
|
class DataArguments: |
|
dataset_config: str = field(default="", |
|
metadata={"help": "Training dataset config path"}) |
|
|
|
|
|
lazy_preprocess: bool = False |
|
is_multimodal: bool = False |
|
|
|
image_aspect_ratio: str = 'square' |
|
|
|
num_segments: int = 10 |
|
sample_strategy: str = 'fps0.5' |
|
external_args: dict = None |
|
num_token_per_image: Optional[int] = field(default=32) |
|
|
|
|
|
@dataclass |
|
class TrainingArguments(transformers.TrainingArguments): |
|
cache_dir: Optional[str] = field(default=None) |
|
optim: str = field(default="adamw_torch") |
|
remove_unused_columns: bool = field(default=False) |
|
freeze_mm_mlp_adapter: bool = field(default=False) |
|
freeze_qformer: bool = field(default=True) |
|
freeze_adapter: bool = field(default=False) |
|
mpt_attn_impl: Optional[str] = field(default="triton") |
|
model_max_length: int = field( |
|
default=512, |
|
metadata={ |
|
"help": |
|
"Maximum sequence length. Sequences will be right padded (and possibly truncated)." |
|
}, |
|
) |
|
double_quant: bool = field( |
|
default=True, |
|
metadata={"help": "Compress the quantization statistics through double quantization."} |
|
) |
|
quant_type: str = field( |
|
default="nf4", |
|
metadata={"help": "Quantization data type to use. Should be one of `fp4` or `nf4`."} |
|
) |
|
bits: int = field( |
|
default=16, |
|
metadata={"help": "How many bits to use."} |
|
) |
|
lora_enable: bool = False |
|
lora_r: int = 64 |
|
lora_alpha: int = 16 |
|
lora_dropout: float = 0.05 |
|
lora_weight_path: str = "" |
|
lora_bias: str = "none" |
|
mm_projector_lr: Optional[float] = None |
|
lora_lr: Optional[float] = None |
|
group_by_modality_length: bool = field(default=False) |