Spaces:
Paused
Paused
alessandro trinca tornidor
commited on
Commit
·
acbbf71
1
Parent(s):
4d19eb4
[test] update inference function to return also output mask, useful for tests (now on saturncloud test.ipynb notebook)
Browse files- README.md +34 -1
- notebooks/test.ipynb +0 -0
- requirements_jupyter.txt +4 -0
- tests/__init__.py +0 -0
- tests/imgs/example1_mask_0.png +3 -0
- tests/test_app_helpers.py +88 -0
- utils/app_helpers.py +26 -23
README.md
CHANGED
|
@@ -7,7 +7,40 @@ sdk: docker
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
[](http://103.170.5.190:7860/)
|
| 13 |
[](https://openxlab.org.cn/apps/detail/openxlab-app/LISA)
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# exec jupyter on the remote server with port forwarding on localhost
|
| 11 |
+
|
| 12 |
+
1. checkout repo, install venv with jupyter
|
| 13 |
+
2. port forwarding in localhost wiht private key: `ssh -i ~/.ssh/id_ecdsa_saturncloud [email protected] -L 8889:localhost:8889 -N -f`
|
| 14 |
+
3. start the jupyter-lab server
|
| 15 |
+
4. connect to page in localhost
|
| 16 |
+
|
| 17 |
+
## Commands to work on saturncloud after clone and git lfs install
|
| 18 |
+
```bash
|
| 19 |
+
cd ~/workspace/lisa-on-gpu/
|
| 20 |
+
rm -rf lisa_venv
|
| 21 |
+
python3 -m venv lisa_venv
|
| 22 |
+
ln -s lisa_venv/ venv
|
| 23 |
+
source venv/bin/activate
|
| 24 |
+
pip --version
|
| 25 |
+
which python
|
| 26 |
+
python -m pip install pip wheel --upgrade
|
| 27 |
+
python -m pip install pytest pytest-cov jupyterlab
|
| 28 |
+
python -m pip install -r requirements.txt
|
| 29 |
+
nohup jupyter-lab &
|
| 30 |
+
tail -F nohup.out
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
# Jupyterlab Howto
|
| 34 |
+
|
| 35 |
+
To run the `test.ipynb` notebook you should already:
|
| 36 |
+
- cloned project https://huggingface.co/spaces/aletrn/lisa-on-gpu with active git lfs
|
| 37 |
+
- created and activated a virtualenv
|
| 38 |
+
- installed jupyterlab dependencies from requirements_jupyter.txt
|
| 39 |
+
- installed dependencies from requirements.txt
|
| 40 |
+
|
| 41 |
+
## Hardware requirements
|
| 42 |
+
- an nvidia gpu with 10 or 12GB of memory (a T4 should suffice)
|
| 43 |
+
- at least 16GB of system ram
|
| 44 |
|
| 45 |
[](http://103.170.5.190:7860/)
|
| 46 |
[](https://openxlab.org.cn/apps/detail/openxlab-app/LISA)
|
notebooks/test.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements_jupyter.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
jupyterlab
|
| 2 |
+
ipywidgets
|
| 3 |
+
pytest
|
| 4 |
+
pytest-cov
|
tests/__init__.py
ADDED
|
File without changes
|
tests/imgs/example1_mask_0.png
ADDED
|
Git LFS Details
|
tests/test_app_helpers.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import unittest
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class TestAppBuilders(unittest.TestCase):
|
| 6 |
+
|
| 7 |
+
def test_default_creation(self):
|
| 8 |
+
from utils import utils
|
| 9 |
+
|
| 10 |
+
placeholders = utils.create_placeholder_variables()
|
| 11 |
+
self.assertIsInstance(placeholders, dict)
|
| 12 |
+
assert placeholders["no_seg_out"].shape == (512, 512, 3)
|
| 13 |
+
assert placeholders["error_happened"].shape == (512, 512, 3)
|
| 14 |
+
|
| 15 |
+
def test_parse_args(self):
|
| 16 |
+
from utils import app_helpers
|
| 17 |
+
|
| 18 |
+
test_args_parse = app_helpers.parse_args([])
|
| 19 |
+
assert vars(test_args_parse) == {
|
| 20 |
+
'version': 'xinlai/LISA-13B-llama2-v1-explanatory',
|
| 21 |
+
'vis_save_path': './vis_output',
|
| 22 |
+
'precision': 'fp16',
|
| 23 |
+
'image_size': 1024,
|
| 24 |
+
'model_max_length': 512,
|
| 25 |
+
'lora_r': 8,
|
| 26 |
+
'vision_tower': 'openai/clip-vit-large-patch14',
|
| 27 |
+
'local_rank': 0,
|
| 28 |
+
'load_in_8bit': False,
|
| 29 |
+
'load_in_4bit': True,
|
| 30 |
+
'use_mm_start_end': True,
|
| 31 |
+
'conv_type': 'llava_v1'
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
def test_inference(self):
|
| 35 |
+
import cv2
|
| 36 |
+
import numpy as np
|
| 37 |
+
from utils import app_helpers, constants, utils
|
| 38 |
+
|
| 39 |
+
max_diff = 0.02
|
| 40 |
+
|
| 41 |
+
logging.info("starting...")
|
| 42 |
+
logging.warning("Remember: before running again 'get_inference_model_by_args(test_args_parse)' free some memory")
|
| 43 |
+
test_args_parse = app_helpers.parse_args([])
|
| 44 |
+
inference_fn = app_helpers.get_inference_model_by_args(test_args_parse)
|
| 45 |
+
idx_example = 0
|
| 46 |
+
input_prompt, input_image_path = constants.examples[idx_example]
|
| 47 |
+
logging.info("running inference function with input prompt '{}'.".format(input_prompt))
|
| 48 |
+
_, output_mask, output_str = inference_fn(
|
| 49 |
+
input_prompt,
|
| 50 |
+
utils.ROOT / input_image_path
|
| 51 |
+
)
|
| 52 |
+
logging.info(f"output_str: {output_str}.")
|
| 53 |
+
expected_mask = cv2.imread(
|
| 54 |
+
str(utils.ROOT / "tests" / "imgs" / f"example{idx_example}_mask_0.png"),
|
| 55 |
+
cv2.IMREAD_GRAYSCALE
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
tot = output_mask.size
|
| 59 |
+
count = np.sum(output_mask != expected_mask)
|
| 60 |
+
perc = 100 * count / tot
|
| 61 |
+
|
| 62 |
+
logging.info(f"diff 1 vs 1b: {perc:.2f}!")
|
| 63 |
+
try:
|
| 64 |
+
assert np.array_equal(output_mask, expected_mask)
|
| 65 |
+
except AssertionError:
|
| 66 |
+
try:
|
| 67 |
+
logging.error("failed equality assertion!")
|
| 68 |
+
logging.info(f"assert now that perc diff between ndarrays is minor than {max_diff}.")
|
| 69 |
+
assert perc < max_diff
|
| 70 |
+
except AssertionError as ae:
|
| 71 |
+
logging.error("failed all assertions, writing debug files...")
|
| 72 |
+
import datetime
|
| 73 |
+
now_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
| 74 |
+
output_folder = utils.ROOT / "tests" / "imgs"
|
| 75 |
+
prefix = f"broken_test_example{idx_example + 1}_{now_str}"
|
| 76 |
+
cv2.imwrite(
|
| 77 |
+
str(output_folder / f"{prefix}.png"),
|
| 78 |
+
output_mask
|
| 79 |
+
)
|
| 80 |
+
with open(output_folder / f"{prefix}__input_prompt.txt",
|
| 81 |
+
"w") as dst:
|
| 82 |
+
dst.write(input_prompt)
|
| 83 |
+
with open(output_folder / f"{prefix}__output_str.txt",
|
| 84 |
+
"w") as dst:
|
| 85 |
+
dst.write(output_str)
|
| 86 |
+
logging.info(f"Written files with prefix '{prefix}' in {output_folder} folder.")
|
| 87 |
+
raise ae
|
| 88 |
+
logging.info("end")
|
utils/app_helpers.py
CHANGED
|
@@ -17,7 +17,6 @@ from model.llava import conversation as conversation_lib
|
|
| 17 |
from model.llava.mm_utils import tokenizer_image_token
|
| 18 |
from model.segment_anything.utils.transforms import ResizeLongestSide
|
| 19 |
|
| 20 |
-
|
| 21 |
placeholders = utils.create_placeholder_variables()
|
| 22 |
|
| 23 |
|
|
@@ -96,10 +95,10 @@ def set_image_precision_by_args(input_image, precision):
|
|
| 96 |
|
| 97 |
@session_logger.set_uuid_logging
|
| 98 |
def preprocess(
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
) -> torch.Tensor:
|
| 104 |
"""Normalize pixel values and pad to a square input."""
|
| 105 |
logging.info("preprocess started")
|
|
@@ -161,7 +160,8 @@ def get_model(args_to_parse):
|
|
| 161 |
}
|
| 162 |
)
|
| 163 |
_model = LISAForCausalLM.from_pretrained(
|
| 164 |
-
args_to_parse.version, low_cpu_mem_usage=True, vision_tower=args_to_parse.vision_tower,
|
|
|
|
| 165 |
)
|
| 166 |
_model.config.eos_token_id = _tokenizer.eos_token_id
|
| 167 |
_model.config.bos_token_id = _tokenizer.bos_token_id
|
|
@@ -207,7 +207,6 @@ def get_inference_model_by_args(args_to_parse):
|
|
| 207 |
@session_logger.set_uuid_logging
|
| 208 |
def inference(input_str, input_image_pathname):
|
| 209 |
## filter out special chars
|
| 210 |
-
|
| 211 |
input_str = get_cleaned_input(input_str)
|
| 212 |
logging.info(f"input_str type: {type(input_str)}, input_image type: {type(input_image_pathname)}.")
|
| 213 |
logging.info(f"input_str: {input_str}, input_image: {type(input_image_pathname)}.")
|
|
@@ -225,7 +224,7 @@ def get_inference_model_by_args(args_to_parse):
|
|
| 225 |
prompt = utils.DEFAULT_IMAGE_TOKEN + "\n" + prompt
|
| 226 |
if args_to_parse.use_mm_start_end:
|
| 227 |
replace_token = (
|
| 228 |
-
|
| 229 |
)
|
| 230 |
prompt = prompt.replace(utils.DEFAULT_IMAGE_TOKEN, replace_token)
|
| 231 |
|
|
@@ -276,25 +275,28 @@ def get_inference_model_by_args(args_to_parse):
|
|
| 276 |
text_output = text_output.replace("\n", "").replace(" ", " ")
|
| 277 |
text_output = text_output.split("ASSISTANT: ")[-1]
|
| 278 |
|
| 279 |
-
logging.info(
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
for i, pred_mask in enumerate(pred_masks):
|
| 282 |
-
if pred_mask.shape[0] == 0:
|
| 283 |
continue
|
| 284 |
-
|
| 285 |
pred_mask = pred_mask.detach().cpu().numpy()[0]
|
| 286 |
-
|
|
|
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
image_np * 0.5
|
| 291 |
-
+
|
| 292 |
-
)[
|
| 293 |
|
| 294 |
-
output_str = f"ASSISTANT: {text_output}"
|
| 295 |
-
output_image
|
| 296 |
-
|
| 297 |
-
return output_image, output_str
|
| 298 |
|
| 299 |
logging.info("prepared inference function!")
|
| 300 |
return inference
|
|
@@ -303,7 +305,7 @@ def get_inference_model_by_args(args_to_parse):
|
|
| 303 |
@session_logger.set_uuid_logging
|
| 304 |
def get_gradio_interface(
|
| 305 |
fn_inference: Callable
|
| 306 |
-
|
| 307 |
return gr.Interface(
|
| 308 |
fn_inference,
|
| 309 |
inputs=[
|
|
@@ -311,7 +313,8 @@ def get_gradio_interface(
|
|
| 311 |
gr.Image(type="filepath", label="Input Image")
|
| 312 |
],
|
| 313 |
outputs=[
|
| 314 |
-
gr.Image(type="pil", label="
|
|
|
|
| 315 |
gr.Textbox(lines=1, placeholder=None, label="Text Output")
|
| 316 |
],
|
| 317 |
title=constants.title,
|
|
|
|
| 17 |
from model.llava.mm_utils import tokenizer_image_token
|
| 18 |
from model.segment_anything.utils.transforms import ResizeLongestSide
|
| 19 |
|
|
|
|
| 20 |
placeholders = utils.create_placeholder_variables()
|
| 21 |
|
| 22 |
|
|
|
|
| 95 |
|
| 96 |
@session_logger.set_uuid_logging
|
| 97 |
def preprocess(
|
| 98 |
+
x,
|
| 99 |
+
pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1),
|
| 100 |
+
pixel_std=torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1),
|
| 101 |
+
img_size=1024,
|
| 102 |
) -> torch.Tensor:
|
| 103 |
"""Normalize pixel values and pad to a square input."""
|
| 104 |
logging.info("preprocess started")
|
|
|
|
| 160 |
}
|
| 161 |
)
|
| 162 |
_model = LISAForCausalLM.from_pretrained(
|
| 163 |
+
args_to_parse.version, low_cpu_mem_usage=True, vision_tower=args_to_parse.vision_tower,
|
| 164 |
+
seg_token_idx=args_to_parse.seg_token_idx, **kwargs
|
| 165 |
)
|
| 166 |
_model.config.eos_token_id = _tokenizer.eos_token_id
|
| 167 |
_model.config.bos_token_id = _tokenizer.bos_token_id
|
|
|
|
| 207 |
@session_logger.set_uuid_logging
|
| 208 |
def inference(input_str, input_image_pathname):
|
| 209 |
## filter out special chars
|
|
|
|
| 210 |
input_str = get_cleaned_input(input_str)
|
| 211 |
logging.info(f"input_str type: {type(input_str)}, input_image type: {type(input_image_pathname)}.")
|
| 212 |
logging.info(f"input_str: {input_str}, input_image: {type(input_image_pathname)}.")
|
|
|
|
| 224 |
prompt = utils.DEFAULT_IMAGE_TOKEN + "\n" + prompt
|
| 225 |
if args_to_parse.use_mm_start_end:
|
| 226 |
replace_token = (
|
| 227 |
+
utils.DEFAULT_IM_START_TOKEN + utils.DEFAULT_IMAGE_TOKEN + utils.DEFAULT_IM_END_TOKEN
|
| 228 |
)
|
| 229 |
prompt = prompt.replace(utils.DEFAULT_IMAGE_TOKEN, replace_token)
|
| 230 |
|
|
|
|
| 275 |
text_output = text_output.replace("\n", "").replace(" ", " ")
|
| 276 |
text_output = text_output.split("ASSISTANT: ")[-1]
|
| 277 |
|
| 278 |
+
logging.info(
|
| 279 |
+
f"found n {len(pred_masks)} prediction masks, "
|
| 280 |
+
f"text_output type: {type(text_output)}, text_output: {text_output}."
|
| 281 |
+
)
|
| 282 |
+
output_image = no_seg_out
|
| 283 |
+
output_mask = no_seg_out
|
| 284 |
for i, pred_mask in enumerate(pred_masks):
|
| 285 |
+
if pred_mask.shape[0] == 0 or pred_mask.shape[1] == 0:
|
| 286 |
continue
|
|
|
|
| 287 |
pred_mask = pred_mask.detach().cpu().numpy()[0]
|
| 288 |
+
pred_mask_bool = pred_mask > 0
|
| 289 |
+
output_mask = pred_mask_bool.astype(np.uint8) * 255
|
| 290 |
|
| 291 |
+
output_image = image_np.copy()
|
| 292 |
+
output_image[pred_mask_bool] = (
|
| 293 |
image_np * 0.5
|
| 294 |
+
+ pred_mask_bool[:, :, None].astype(np.uint8) * np.array([255, 0, 0]) * 0.5
|
| 295 |
+
)[pred_mask_bool]
|
| 296 |
|
| 297 |
+
output_str = f"ASSISTANT: {text_output} ..."
|
| 298 |
+
logging.info(f"output_image type: {type(output_mask)}.")
|
| 299 |
+
return output_image, output_mask, output_str
|
|
|
|
| 300 |
|
| 301 |
logging.info("prepared inference function!")
|
| 302 |
return inference
|
|
|
|
| 305 |
@session_logger.set_uuid_logging
|
| 306 |
def get_gradio_interface(
|
| 307 |
fn_inference: Callable
|
| 308 |
+
):
|
| 309 |
return gr.Interface(
|
| 310 |
fn_inference,
|
| 311 |
inputs=[
|
|
|
|
| 313 |
gr.Image(type="filepath", label="Input Image")
|
| 314 |
],
|
| 315 |
outputs=[
|
| 316 |
+
gr.Image(type="pil", label="segmentation Output"),
|
| 317 |
+
gr.Image(type="pil", label="mask Output"),
|
| 318 |
gr.Textbox(lines=1, placeholder=None, label="Text Output")
|
| 319 |
],
|
| 320 |
title=constants.title,
|