| from transformers import PretrainedConfig, LlamaConfig, SiglipVisionConfig | |
| class LlamavisionConfig(PretrainedConfig): | |
| model_type = "llamavision" | |
| def __init__(self, **kwargs): | |
| self.text_config = LlamaConfig(**kwargs.pop("text_config", {})) | |
| self.vision_config = SiglipVisionConfig(**kwargs.pop("vision_config", {})) | |
| super().__init__(**kwargs) | |