| RC_augmentation: false | |
| _dataset_cfg_lookup: | |
| gencode128k_basic: | |
| hf_path: jzshared/gencode128k_basic | |
| path: data/gencode128k_basic | |
| type: refseq | |
| gencode128k_debug: | |
| hf_path: jzshared/gencode128k_debug | |
| path: data/gencode128k_debug | |
| type: refseq | |
| gencode_human_12.8k: | |
| hf_path: jzshared/gencode_human_12.8k | |
| path: data/gencode_human_12.8k | |
| type: refseq | |
| gencode_human_128k: | |
| hf_path: jzshared/gencode_human_128k | |
| path: data/gencode_human_128k | |
| type: refseq | |
| hg38_128k: | |
| hf_path: jzshared/hg38_cds_anchored_128000 | |
| path: data/hg38_cds_anchored_128000 | |
| type: refseq | |
| hg38_12k: | |
| hf_path: jzshared/hg38_12800 | |
| path: data/hg38_cds_anchored_len12800_mincds150_1000000samples | |
| type: refseq | |
| hg38_cds_4m: | |
| hf_path: null | |
| path: data/hg38_cds_dataset_4m_filtered | |
| type: refseq | |
| alias: CKPT_DEBUG | |
| alpha_exp: 1.0 | |
| alpha_max: 0.03 | |
| arch: hnet | |
| batch_size: 32 | |
| bp_per_token: 3 | |
| cluster: mila | |
| cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k | |
| model=hnet/mamba_64m_2dc max_len=12800 batch_size=32 grad_acc_steps=1 max_train_steps=20 | |
| eval_steps=10 save_steps=10 alpha_max=0.03 use_routing_floor=false strictness_max=0 | |
| region_info=promoter1_cds1_utr1_exon1_intron1_nig1_dig1 alias=CKPT_DEBUG bp_per_token=3 | |
| use_wandb=true upload_to_hf=true hf_repo=jzshared/ckpt_debug | |
| config_path: null | |
| data: gencode_human_12.8k | |
| data_alias: ${.data}_${max_len} | |
| dataset: ${_dataset_cfg_lookup[${data}]} | |
| device: cuda | |
| device_type: GPU | |
| dirs: | |
| data_cache: ${project_root}/data_cache/ | |
| data_storage: ${project_root}/data/ | |
| hydra: ${project_root}/temp/hydra/ | |
| output: ${project_root}/output/${data_alias}/${alias}/ | |
| temp: ${project_root}/temp/working_dir/${uid}/ | |
| wandb_cache: ${oc.env:WANDB_CACHE_DIR,${project_root}/temp/wandb_cache/} | |
| epochs: 200 | |
| eval_batch_size: ${batch_size} | |
| eval_steps: 10 | |
| grad_acc_steps: 1 | |
| hf_repo: jzshared/ckpt_debug | |
| hf_repo_owner: jzshared | |
| is_distributed: false | |
| local_rank: 0 | |
| logging: | |
| level: info | |
| log_wandb_metric_to_stdout: true | |
| lr: 0.001 | |
| master_port: '41105' | |
| max_data_samples: null | |
| max_eval_samples: 1000 | |
| max_len: 12800 | |
| max_length: ${max_len} | |
| max_train_steps: 20 | |
| min_routing_tokens: 8 | |
| mode: Stage1 | |
| model: | |
| arch: hnet | |
| name: hnet_mamba_64m_2dc | |
| model_alias: ${oc.select:model.name,UnknownModel} | |
| model_cfg: | |
| arch_layout: | |
| - m2 | |
| - - m2 | |
| - - m15 | |
| - m2 | |
| - m2 | |
| attn_cfg: | |
| num_heads: | |
| - 8 | |
| - 8 | |
| - 12 | |
| rotary_emb_dim: | |
| - 16 | |
| - 16 | |
| - 24 | |
| window_size: | |
| - 511 | |
| - 511 | |
| - -1 | |
| d_intermediate: | |
| - 0 | |
| - 0 | |
| - 2048 | |
| d_model: | |
| - 512 | |
| - 512 | |
| - 768 | |
| min_routing_tokens: ${min_routing_tokens} | |
| n_gpt: 1.0 | |
| r_hi: ${r_hi} | |
| r_low: ${r_low} | |
| r_warm_up_end: ${r_warm_up_end} | |
| r_warm_up_start: ${r_warm_up_start} | |
| ssm_cfg: | |
| chunk_size: 256 | |
| d_conv: 4 | |
| d_state: 64 | |
| expand: 2 | |
| head_dim: 64 | |
| tie_embeddings: true | |
| vocab_size: 12 | |
| name: hnet_base | |
| private: false | |
| project_root: ${hydra:runtime.cwd} | |
| r_hi: 0.3 | |
| r_low: 0.0 | |
| r_warm_up_end: 750 | |
| r_warm_up_start: 200 | |
| rank: 0 | |
| reference_loss: null | |
| region_info: promoter1_cds1_utr1_exon1_intron1_nig1_dig1 | |
| save_steps: 10 | |
| seed: 0 | |
| source: ${dataset.type} | |
| strictness_exp: 1.0 | |
| strictness_max: 0 | |
| tokenizer: fast | |
| training: | |
| adam_beta1: 0.9 | |
| adam_beta2: 0.95 | |
| bf16: true | |
| dataloader_drop_last: true | |
| dataloader_num_workers: 1 | |
| disable_tqdm: false | |
| do_train: true | |
| eval_steps: ${eval_steps} | |
| eval_strategy: steps | |
| gradient_accumulation_steps: ${grad_acc_steps} | |
| gradient_checkpointing: false | |
| group_by_length: false | |
| label_names: | |
| - input_ids | |
| learning_rate: ${lr} | |
| logging_steps: 10 | |
| lr_scheduler_type: linear | |
| max_grad_norm: 2.0 | |
| max_train_steps: ${max_train_steps} | |
| num_train_epochs: ${epochs} | |
| output_dir: ${dirs.output} | |
| overrides: {} | |
| per_device_eval_batch_size: ${eval_batch_size} | |
| per_device_train_batch_size: ${batch_size} | |
| remove_unused_columns: false | |
| report_to: null | |
| save_steps: ${save_steps} | |
| save_strategy: steps | |
| use_lr_multiplier: true | |
| warmup_steps: 500 | |
| weight_decay: 0.1 | |
| training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len} | |
| uid: gnrjc7ou | |
| upload_to_hf: true | |
| use_routing_floor: false | |
| use_wandb: true | |
| valid_test_downsample: null | |
| version: NA | |
| wandb: | |
| dir: ${dirs.wandb_cache} | |
| entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}} | |
| id: gnrjc7ou | |
| mode: online | |
| name: CKPT_DEBUG | |
| project: ${oc.select:env.vars.wandb_proj,DNAFM} | |
| step_metric: null | |
| tags: | |
| - ${mode} | |
| url: https://wandb.ai/jzshared/DNAFM/runs/gnrjc7ou | |
| warmup_steps: 0 | |
| world_size: 1 | |