Update README.md
Browse files
README.md
CHANGED
@@ -175,20 +175,26 @@ Our code is based on LLaVA-NeXT, before running, please install the LLaVA-NeXT t
|
|
175 |
```shell
|
176 |
pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
|
177 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
**Load Model**
|
179 |
```python
|
180 |
from llava.model.builder import load_pretrained_model
|
181 |
-
from llava.constants import
|
182 |
-
|
183 |
-
DEFAULT_IM_START_TOKEN,
|
184 |
-
DEFAULT_IMAGE_TOKEN,
|
185 |
-
IGNORE_INDEX,
|
186 |
-
IMAGE_TOKEN_INDEX,
|
187 |
-
)
|
188 |
from llava.mm_utils import (
|
189 |
KeywordsStoppingCriteria,
|
190 |
get_model_name_from_path,
|
191 |
-
tokenizer_image_token
|
|
|
192 |
)
|
193 |
from llava.conversation import SeparatorStyle, conv_templates
|
194 |
from llava.eval.model_vqa import preprocess_qwen
|
|
|
175 |
```shell
|
176 |
pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
|
177 |
```
|
178 |
+
**Error Handling**
|
179 |
+
|
180 |
+
You might encounter an error when loading checkpoint from the local disk:
|
181 |
+
```shell
|
182 |
+
RuntimeError: Error(s) in loading state_dict for CLIPVisionModel:
|
183 |
+
size mismatch for vision_model.embeddings.position_embedding.weight: copying a param with shape torch.Size([729, 1152]) from checkpoint, the shape in current model is torch.Size([730, 1152]).
|
184 |
+
You may consider adding `ignore_mismatched_sizes=True` in the model `from_pretrained` method.
|
185 |
+
```
|
186 |
+
If you meet this error, you can fix this error following the guidelines in [this issue](https://github.com/inst-it/inst-it/issues/3).
|
187 |
+
|
188 |
**Load Model**
|
189 |
```python
|
190 |
from llava.model.builder import load_pretrained_model
|
191 |
+
from llava.constants import DEFAULT_IMAGE_TOKEN
|
192 |
+
|
|
|
|
|
|
|
|
|
|
|
193 |
from llava.mm_utils import (
|
194 |
KeywordsStoppingCriteria,
|
195 |
get_model_name_from_path,
|
196 |
+
tokenizer_image_token,
|
197 |
+
process_images
|
198 |
)
|
199 |
from llava.conversation import SeparatorStyle, conv_templates
|
200 |
from llava.eval.model_vqa import preprocess_qwen
|