Spaces:

aletrn
/

lisa-on-cuda

Paused

alessandro trinca tornidor commited on Feb 27, 2024

Commit

37a5f04

1 Parent(s): e789db0

[refactor] set image precision with an external function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -100,6 +100,16 @@ def parse_args(args_to_parse):
     return parser.parse_args(args_to_parse)
 def preprocess(
     x,
     pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1),
@@ -267,12 +277,7 @@ def get_inference_model_by_args(args_to_parse):
             .cuda()
         )
         logging.info(f"image_clip type: {type(image_clip)}.")
-        if args_to_parse.precision == "bf16":
-            image_clip = image_clip.bfloat16()
-        elif args_to_parse.precision == "fp16":
-            image_clip = image_clip.half()
-        else:
-            image_clip = image_clip.float()
         image = transform.apply_image(image_np)
         resize_list = [image.shape[:2]]
@@ -283,12 +288,7 @@ def get_inference_model_by_args(args_to_parse):
             .cuda()
         )
         logging.info(f"image_clip type: {type(image_clip)}.")
-        if args_to_parse.precision == "bf16":
-            image = image.bfloat16()
-        elif args_to_parse.precision == "fp16":
-            image = image.half()
-        else:
-            image = image.float()
         input_ids = tokenizer_image_token(prompt, tokenizer, return_tensors="pt")
         input_ids = input_ids.unsqueeze(0).cuda()
@@ -330,6 +330,7 @@ def get_inference_model_by_args(args_to_parse):
             ## no seg output
             output_image = cv2.imread("./resources/no_seg_out.png")[:, :, ::-1]
         return output_image, output_str
     return inference

     return parser.parse_args(args_to_parse)
+def set_image_precision_by_args(input_image, precision):
+    if precision == "bf16":
+        input_image = input_image.bfloat16()
+    elif precision == "fp16":
+        input_image = input_image.half()
+    else:
+        input_image = input_image.float()
+    return input_image
 def preprocess(
     x,
     pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1),
             .cuda()
         )
         logging.info(f"image_clip type: {type(image_clip)}.")
+        image_clip = set_image_precision_by_args(image_clip, args_to_parse.precision)
         image = transform.apply_image(image_np)
         resize_list = [image.shape[:2]]
             .cuda()
         )
         logging.info(f"image_clip type: {type(image_clip)}.")
+        image = set_image_precision_by_args(image, args_to_parse.precision)
         input_ids = tokenizer_image_token(prompt, tokenizer, return_tensors="pt")
         input_ids = input_ids.unsqueeze(0).cuda()
             ## no seg output
             output_image = cv2.imread("./resources/no_seg_out.png")[:, :, ::-1]
         return output_image, output_str
     return inference