Spaces:
Runtime error
Runtime error
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from typing import Optional, Sequence, Tuple, Union | |
| import mmcv | |
| import numpy as np | |
| import torch | |
| import torch.nn.functional as F | |
| from mmengine.dataset import BaseDataset | |
| from mmengine.dist import master_only | |
| from mmengine.visualization import Visualizer | |
| from mmengine.visualization.utils import img_from_canvas | |
| from mmpretrain.registry import VISUALIZERS | |
| from mmpretrain.structures import DataSample | |
| from .utils import create_figure, get_adaptive_scale | |
| class UniversalVisualizer(Visualizer): | |
| """Universal Visualizer for multiple tasks. | |
| Args: | |
| name (str): Name of the instance. Defaults to 'visualizer'. | |
| image (np.ndarray, optional): the origin image to draw. The format | |
| should be RGB. Defaults to None. | |
| vis_backends (list, optional): Visual backend config list. | |
| Defaults to None. | |
| save_dir (str, optional): Save file dir for all storage backends. | |
| If it is None, the backend storage will not save any data. | |
| fig_save_cfg (dict): Keyword parameters of figure for saving. | |
| Defaults to empty dict. | |
| fig_show_cfg (dict): Keyword parameters of figure for showing. | |
| Defaults to empty dict. | |
| """ | |
| DEFAULT_TEXT_CFG = { | |
| 'family': 'monospace', | |
| 'color': 'white', | |
| 'bbox': dict(facecolor='black', alpha=0.5, boxstyle='Round'), | |
| 'verticalalignment': 'top', | |
| 'horizontalalignment': 'left', | |
| } | |
| def visualize_cls(self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| classes: Optional[Sequence[str]] = None, | |
| draw_gt: bool = True, | |
| draw_pred: bool = True, | |
| draw_score: bool = True, | |
| resize: Optional[int] = None, | |
| rescale_factor: Optional[float] = None, | |
| text_cfg: dict = dict(), | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: str = '', | |
| step: int = 0) -> None: | |
| """Visualize image classification result. | |
| This method will draw an text box on the input image to visualize the | |
| information about image classification, like the ground-truth label and | |
| prediction label. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| classes (Sequence[str], optional): The categories names. | |
| Defaults to None. | |
| draw_gt (bool): Whether to draw ground-truth labels. | |
| Defaults to True. | |
| draw_pred (bool): Whether to draw prediction labels. | |
| Defaults to True. | |
| draw_score (bool): Whether to draw the prediction scores | |
| of prediction categories. Defaults to True. | |
| resize (int, optional): Resize the short edge of the image to the | |
| specified length before visualization. Defaults to None. | |
| rescale_factor (float, optional): Rescale the image by the rescale | |
| factor before visualization. Defaults to None. | |
| text_cfg (dict): Extra text setting, which accepts | |
| arguments of :meth:`mmengine.Visualizer.draw_texts`. | |
| Defaults to an empty dict. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| if self.dataset_meta is not None: | |
| classes = classes or self.dataset_meta.get('classes', None) | |
| if resize is not None: | |
| h, w = image.shape[:2] | |
| if w < h: | |
| image = mmcv.imresize(image, (resize, resize * h // w)) | |
| else: | |
| image = mmcv.imresize(image, (resize * w // h, resize)) | |
| elif rescale_factor is not None: | |
| image = mmcv.imrescale(image, rescale_factor) | |
| texts = [] | |
| self.set_image(image) | |
| if draw_gt and 'gt_label' in data_sample: | |
| idx = data_sample.gt_label.tolist() | |
| class_labels = [''] * len(idx) | |
| if classes is not None: | |
| class_labels = [f' ({classes[i]})' for i in idx] | |
| labels = [str(idx[i]) + class_labels[i] for i in range(len(idx))] | |
| prefix = 'Ground truth: ' | |
| texts.append(prefix + ('\n' + ' ' * len(prefix)).join(labels)) | |
| if draw_pred and 'pred_label' in data_sample: | |
| idx = data_sample.pred_label.tolist() | |
| score_labels = [''] * len(idx) | |
| class_labels = [''] * len(idx) | |
| if draw_score and 'pred_score' in data_sample: | |
| score_labels = [ | |
| f', {data_sample.pred_score[i].item():.2f}' for i in idx | |
| ] | |
| if classes is not None: | |
| class_labels = [f' ({classes[i]})' for i in idx] | |
| labels = [ | |
| str(idx[i]) + score_labels[i] + class_labels[i] | |
| for i in range(len(idx)) | |
| ] | |
| prefix = 'Prediction: ' | |
| texts.append(prefix + ('\n' + ' ' * len(prefix)).join(labels)) | |
| img_scale = get_adaptive_scale(image.shape[:2]) | |
| text_cfg = { | |
| 'size': int(img_scale * 7), | |
| **self.DEFAULT_TEXT_CFG, | |
| **text_cfg, | |
| } | |
| self.ax_save.text( | |
| img_scale * 5, | |
| img_scale * 5, | |
| '\n'.join(texts), | |
| **text_cfg, | |
| ) | |
| drawn_img = self.get_image() | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |
| def visualize_image_retrieval(self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| prototype_dataset: BaseDataset, | |
| topk: int = 1, | |
| draw_score: bool = True, | |
| resize: Optional[int] = None, | |
| text_cfg: dict = dict(), | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: Optional[str] = '', | |
| step: int = 0) -> None: | |
| """Visualize image retrieval result. | |
| This method will draw the input image and the images retrieved from the | |
| prototype dataset. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| prototype_dataset (:obj:`BaseDataset`): The prototype dataset. | |
| It should have `get_data_info` method and return a dict | |
| includes `img_path`. | |
| draw_score (bool): Whether to draw the match scores of the | |
| retrieved images. Defaults to True. | |
| resize (int, optional): Resize the long edge of the image to the | |
| specified length before visualization. Defaults to None. | |
| text_cfg (dict): Extra text setting, which accepts arguments of | |
| :func:`plt.text`. Defaults to an empty dict. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg} | |
| if resize is not None: | |
| image = mmcv.imrescale(image, (resize, resize)) | |
| match_scores, indices = torch.topk(data_sample.pred_score, k=topk) | |
| figure = create_figure(margin=True) | |
| gs = figure.add_gridspec(2, topk) | |
| query_plot = figure.add_subplot(gs[0, :]) | |
| query_plot.axis(False) | |
| query_plot.imshow(image) | |
| for k, (score, sample_idx) in enumerate(zip(match_scores, indices)): | |
| sample = prototype_dataset.get_data_info(sample_idx.item()) | |
| value_image = mmcv.imread(sample['img_path'])[..., ::-1] | |
| value_plot = figure.add_subplot(gs[1, k]) | |
| value_plot.axis(False) | |
| value_plot.imshow(value_image) | |
| if draw_score: | |
| value_plot.text( | |
| 5, | |
| 5, | |
| f'{score:.2f}', | |
| **text_cfg, | |
| ) | |
| drawn_img = img_from_canvas(figure.canvas) | |
| self.set_image(drawn_img) | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |
| def add_mask_to_image( | |
| self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| resize: Union[int, Tuple[int]] = 224, | |
| color: Union[str, Tuple[int]] = 'black', | |
| alpha: Union[int, float] = 0.8, | |
| ) -> np.ndarray: | |
| if isinstance(resize, int): | |
| resize = (resize, resize) | |
| image = mmcv.imresize(image, resize) | |
| self.set_image(image) | |
| if isinstance(data_sample.mask, np.ndarray): | |
| data_sample.mask = torch.tensor(data_sample.mask) | |
| mask = data_sample.mask.float()[None, None, ...] | |
| mask_ = F.interpolate(mask, image.shape[:2], mode='nearest')[0, 0] | |
| self.draw_binary_masks(mask_.bool(), colors=color, alphas=alpha) | |
| drawn_img = self.get_image() | |
| return drawn_img | |
| def visualize_masked_image(self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| resize: Union[int, Tuple[int]] = 224, | |
| color: Union[str, Tuple[int]] = 'black', | |
| alpha: Union[int, float] = 0.8, | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: str = '', | |
| step: int = 0) -> None: | |
| """Visualize masked image. | |
| This method will draw an image with binary mask. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| resize (int | Tuple[int]): Resize the input image to the specified | |
| shape. Defaults to 224. | |
| color (str | Tuple[int]): The color of the binary mask. | |
| Defaults to "black". | |
| alpha (int | float): The transparency of the mask. Defaults to 0.8. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| drawn_img = self.add_mask_to_image( | |
| image=image, | |
| data_sample=data_sample, | |
| resize=resize, | |
| color=color, | |
| alpha=alpha) | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |
| def visualize_image_caption(self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| resize: Optional[int] = None, | |
| text_cfg: dict = dict(), | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: Optional[str] = '', | |
| step: int = 0) -> None: | |
| """Visualize image caption result. | |
| This method will draw the input image and the images caption. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| resize (int, optional): Resize the long edge of the image to the | |
| specified length before visualization. Defaults to None. | |
| text_cfg (dict): Extra text setting, which accepts arguments of | |
| :func:`plt.text`. Defaults to an empty dict. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg} | |
| if resize is not None: | |
| h, w = image.shape[:2] | |
| if w < h: | |
| image = mmcv.imresize(image, (resize, resize * h // w)) | |
| else: | |
| image = mmcv.imresize(image, (resize * w // h, resize)) | |
| self.set_image(image) | |
| img_scale = get_adaptive_scale(image.shape[:2]) | |
| text_cfg = { | |
| 'size': int(img_scale * 7), | |
| **self.DEFAULT_TEXT_CFG, | |
| **text_cfg, | |
| } | |
| self.ax_save.text( | |
| img_scale * 5, | |
| img_scale * 5, | |
| data_sample.get('pred_caption'), | |
| wrap=True, | |
| **text_cfg, | |
| ) | |
| drawn_img = self.get_image() | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |
| def visualize_vqa(self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| resize: Optional[int] = None, | |
| text_cfg: dict = dict(), | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: Optional[str] = '', | |
| step: int = 0) -> None: | |
| """Visualize visual question answering result. | |
| This method will draw the input image, question and answer. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| resize (int, optional): Resize the long edge of the image to the | |
| specified length before visualization. Defaults to None. | |
| text_cfg (dict): Extra text setting, which accepts arguments of | |
| :func:`plt.text`. Defaults to an empty dict. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg} | |
| if resize is not None: | |
| h, w = image.shape[:2] | |
| if w < h: | |
| image = mmcv.imresize(image, (resize, resize * h // w)) | |
| else: | |
| image = mmcv.imresize(image, (resize * w // h, resize)) | |
| self.set_image(image) | |
| img_scale = get_adaptive_scale(image.shape[:2]) | |
| text_cfg = { | |
| 'size': int(img_scale * 7), | |
| **self.DEFAULT_TEXT_CFG, | |
| **text_cfg, | |
| } | |
| text = (f'Q: {data_sample.get("question")}\n' | |
| f'A: {data_sample.get("pred_answer")}') | |
| self.ax_save.text( | |
| img_scale * 5, | |
| img_scale * 5, | |
| text, | |
| wrap=True, | |
| **text_cfg, | |
| ) | |
| drawn_img = self.get_image() | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |
| def visualize_visual_grounding(self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| resize: Optional[int] = None, | |
| text_cfg: dict = dict(), | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: Optional[str] = '', | |
| line_width: Union[int, float] = 3, | |
| bbox_color: Union[str, tuple] = 'green', | |
| step: int = 0) -> None: | |
| """Visualize visual grounding result. | |
| This method will draw the input image, bbox and the object. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| resize (int, optional): Resize the long edge of the image to the | |
| specified length before visualization. Defaults to None. | |
| text_cfg (dict): Extra text setting, which accepts arguments of | |
| :func:`plt.text`. Defaults to an empty dict. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg} | |
| gt_bboxes = data_sample.get('gt_bboxes') | |
| pred_bboxes = data_sample.get('pred_bboxes') | |
| if resize is not None: | |
| h, w = image.shape[:2] | |
| if w < h: | |
| image, w_scale, h_scale = mmcv.imresize( | |
| image, (resize, resize * h // w), return_scale=True) | |
| else: | |
| image, w_scale, h_scale = mmcv.imresize( | |
| image, (resize * w // h, resize), return_scale=True) | |
| pred_bboxes[:, ::2] *= w_scale | |
| pred_bboxes[:, 1::2] *= h_scale | |
| if gt_bboxes is not None: | |
| gt_bboxes[:, ::2] *= w_scale | |
| gt_bboxes[:, 1::2] *= h_scale | |
| self.set_image(image) | |
| # Avoid the line-width limit in the base classes. | |
| self._default_font_size = 1e3 | |
| self.draw_bboxes( | |
| pred_bboxes, line_widths=line_width, edge_colors=bbox_color) | |
| if gt_bboxes is not None: | |
| self.draw_bboxes( | |
| gt_bboxes, line_widths=line_width, edge_colors='blue') | |
| img_scale = get_adaptive_scale(image.shape[:2]) | |
| text_cfg = { | |
| 'size': int(img_scale * 7), | |
| **self.DEFAULT_TEXT_CFG, | |
| **text_cfg, | |
| } | |
| text_positions = pred_bboxes[:, :2] + line_width | |
| for i in range(pred_bboxes.size(0)): | |
| self.ax_save.text( | |
| text_positions[i, 0] + line_width, | |
| text_positions[i, 1] + line_width, | |
| data_sample.get('text'), | |
| **text_cfg, | |
| ) | |
| drawn_img = self.get_image() | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |
| def visualize_t2i_retrieval(self, | |
| text: str, | |
| data_sample: DataSample, | |
| prototype_dataset: BaseDataset, | |
| topk: int = 1, | |
| draw_score: bool = True, | |
| text_cfg: dict = dict(), | |
| fig_cfg: dict = dict(), | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: Optional[str] = '', | |
| step: int = 0) -> None: | |
| """Visualize Text-To-Image retrieval result. | |
| This method will draw the input text and the images retrieved from the | |
| prototype dataset. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| prototype_dataset (:obj:`BaseDataset`): The prototype dataset. | |
| It should have `get_data_info` method and return a dict | |
| includes `img_path`. | |
| topk (int): To visualize the topk matching items. Defaults to 1. | |
| draw_score (bool): Whether to draw the match scores of the | |
| retrieved images. Defaults to True. | |
| text_cfg (dict): Extra text setting, which accepts arguments of | |
| :func:`plt.text`. Defaults to an empty dict. | |
| fig_cfg (dict): Extra figure setting, which accepts arguments of | |
| :func:`plt.Figure`. Defaults to an empty dict. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| text_cfg = {**self.DEFAULT_TEXT_CFG, **text_cfg} | |
| match_scores, indices = torch.topk(data_sample.pred_score, k=topk) | |
| figure = create_figure(margin=True, **fig_cfg) | |
| figure.suptitle(text) | |
| gs = figure.add_gridspec(1, topk) | |
| for k, (score, sample_idx) in enumerate(zip(match_scores, indices)): | |
| sample = prototype_dataset.get_data_info(sample_idx.item()) | |
| value_image = mmcv.imread(sample['img_path'])[..., ::-1] | |
| value_plot = figure.add_subplot(gs[0, k]) | |
| value_plot.axis(False) | |
| value_plot.imshow(value_image) | |
| if draw_score: | |
| value_plot.text( | |
| 5, | |
| 5, | |
| f'{score:.2f}', | |
| **text_cfg, | |
| ) | |
| drawn_img = img_from_canvas(figure.canvas) | |
| self.set_image(drawn_img) | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |
| def visualize_i2t_retrieval(self, | |
| image: np.ndarray, | |
| data_sample: DataSample, | |
| prototype_dataset: Sequence[str], | |
| topk: int = 1, | |
| draw_score: bool = True, | |
| resize: Optional[int] = None, | |
| text_cfg: dict = dict(), | |
| show: bool = False, | |
| wait_time: float = 0, | |
| out_file: Optional[str] = None, | |
| name: str = '', | |
| step: int = 0) -> None: | |
| """Visualize Image-To-Text retrieval result. | |
| This method will draw the input image and the texts retrieved from the | |
| prototype dataset. | |
| Args: | |
| image (np.ndarray): The image to draw. The format should be RGB. | |
| data_sample (:obj:`DataSample`): The annotation of the image. | |
| prototype_dataset (Sequence[str]): The prototype dataset. | |
| It should be a list of texts. | |
| topk (int): To visualize the topk matching items. Defaults to 1. | |
| draw_score (bool): Whether to draw the prediction scores | |
| of prediction categories. Defaults to True. | |
| resize (int, optional): Resize the short edge of the image to the | |
| specified length before visualization. Defaults to None. | |
| text_cfg (dict): Extra text setting, which accepts | |
| arguments of :meth:`mmengine.Visualizer.draw_texts`. | |
| Defaults to an empty dict. | |
| show (bool): Whether to display the drawn image in a window, please | |
| confirm your are able to access the graphical interface. | |
| Defaults to False. | |
| wait_time (float): The display time (s). Defaults to 0, which means | |
| "forever". | |
| out_file (str, optional): Extra path to save the visualization | |
| result. If specified, the visualizer will only save the result | |
| image to the out_file and ignore its storage backends. | |
| Defaults to None. | |
| name (str): The image identifier. It's useful when using the | |
| storage backends of the visualizer to save or display the | |
| image. Defaults to an empty string. | |
| step (int): The global step value. It's useful to record a | |
| series of visualization results for the same image with the | |
| storage backends. Defaults to 0. | |
| Returns: | |
| np.ndarray: The visualization image. | |
| """ | |
| if resize is not None: | |
| h, w = image.shape[:2] | |
| if w < h: | |
| image = mmcv.imresize(image, (resize, resize * h // w)) | |
| else: | |
| image = mmcv.imresize(image, (resize * w // h, resize)) | |
| self.set_image(image) | |
| match_scores, indices = torch.topk(data_sample.pred_score, k=topk) | |
| texts = [] | |
| for score, sample_idx in zip(match_scores, indices): | |
| text = prototype_dataset[sample_idx.item()] | |
| if draw_score: | |
| text = f'{score:.2f} ' + text | |
| texts.append(text) | |
| img_scale = get_adaptive_scale(image.shape[:2]) | |
| text_cfg = { | |
| 'size': int(img_scale * 7), | |
| **self.DEFAULT_TEXT_CFG, | |
| **text_cfg, | |
| } | |
| self.ax_save.text( | |
| img_scale * 5, | |
| img_scale * 5, | |
| '\n'.join(texts), | |
| **text_cfg, | |
| ) | |
| drawn_img = self.get_image() | |
| if show: | |
| self.show(drawn_img, win_name=name, wait_time=wait_time) | |
| if out_file is not None: | |
| # save the image to the target file instead of vis_backends | |
| mmcv.imwrite(drawn_img[..., ::-1], out_file) | |
| else: | |
| self.add_image(name, drawn_img, step=step) | |
| return drawn_img | |