{ "_name_or_path": "logs/checkpoint", "action_downsample": 1, "action_fps": 15, "action_option": "dense", "architectures": [ "DVGFormerModel" ], "backbone_downsample": 14, "cropped_sensor_width": 36.0, "drone_types": [ 0, 1 ], "fix_image_width": true, "focal_alpha": 0.9, "fps": 3, "fps_downsample": 5, "gpt2_config": { "action_downsample": 1, "action_fps": 15, "architectures": [ "UAVPoseNetModel" ], "backbone_downsample": 14, "cropped_sensor_width": 36.0, "fps_downsample": 5, "hdf5_fname": "dataset_full.h5", "image_resolution": [ 168, 294 ], "model_type": "gpt2", "n_action_to_predict": 5, "n_embd": 384, "n_head": 6, "n_positions": 1562, "n_token_image": 45, "n_token_to_predict": 5, "n_token_total": 52, "per_token_preds": 1, "root": "youtube_drone_videos", "torch_dtype": "bfloat16", "vision_feat_dim": 384 }, "hdf5_fname": "dataset_full.h5", "hidden_size": 384, "ignore_value": -100, "image_featmap_shape": [ 5, 9 ], "image_resolution": [ 168, 294 ], "loss_coef_action": 1, "loss_coef_drone_type": 0, "loss_coef_future": 0, "loss_coef_state": 0, "loss_coef_stop": 0, "max_model_frames": 150, "model_type": "dvgformer", "motion_option": "local", "n_action_to_predict": 5, "n_future_frames": 15, "n_token_action": 1, "n_token_boa": 1, "n_token_drone_type": 1, "n_token_frame": 52, "n_token_image": 45, "n_token_noise": 1, "n_token_predict": 5, "n_token_prepend": 2, "n_token_quality": 0, "n_token_state": 1, "n_token_to_predict": 5, "n_token_total": 52, "num_quantile_bins": 10, "pad_side": "right", "pad_token_value": 0, "per_token_preds": 1, "prediction_option": "iterative", "root": "youtube_drone_videos", "test_gt_forcing": "allframe", "torch_dtype": "bfloat16", "transformers_version": "4.45.2", "use_depth": true, "use_quality_mlps": false, "vision_backbone": "dinov2_vits14_reg", "vision_feat_dim": 384 }