{"model_cache_dir": null, "dataset_cache_dir": null, "data_root": "/data/long-llm", "train_data": ["/scratch/amlt_code/data/pretrain/instruction-tuning/sft-v4.jsonl"], "eval_data": "/scratch/amlt_code/data/pretrain/val-8K-1M/data.json", "model_name_or_path": "../../data/outputs/fastlora.Mistral7BInstructv02.mistral-8K-1B-com.w1024-pre-norm-sum.kinf.ri1024.r128.a64.o.svd.bs8.lr1e-4.pt-mix.20240916-110738/checkpoint-14332", "padding_side": "left", "no_use_fast": false, "access_token": null, "attn_impl": "sdpa", "max_length": 8192, "chat_template": "mistral", "max_position_embeddings": null, "mistral_sliding_window": null, "rope_theta": null, "rope_method": null, "rope_factor": 1.0, "lora": null, "lora_unload": true, "load_in_4_bit": false, "dtype": "bf16", "device_map": null, "batch_size": 1, "cpu": false, "enable_tp": false, "enable_lora": true, "lora_r": 32, "lora_alpha": 64.0, "lora_dropout": 0.1, "lora_param": ["q", "k", "v", "o", "up", "down", "gate"], "enable_fastlora": true, "fastlora_r": 32, "fastlora_inter_size": 32, "fastlora_window": 1024, "fastlora_max_rank": 128, "fastlora_attn_len": 8192, "fastlora_gist_len": 0, "fastlora_alpha": 64.0, "fastlora_dropout": 0.1, "fastlora_param": ["q", "k", "v", "o", "up", "down", "gate"], "fastlora_arch": "aassbb", "fastlora_norm": "default", "fastlora_init": "default", "fastlora_merge": "mean", "fastlora_training_attention_mask": "default", "max_new_tokens": null, "do_sample": null, "temperature": null, "top_p": null, "baseline": null, "longlora_s2_attn": true, "autocompr_segment_length": null}