import os import io import pandas as pd import numpy as np import string from uuid import uuid4 import os.path as osp import base64 from PIL import Image from .file import load, dump Image.MAX_IMAGE_PIXELS = 1e9 def mmqa_display(question, target_size=512): question = {k.lower(): v for k, v in question.items()} keys = list(question.keys()) keys = [k for k in keys if k not in ['index', 'image']] images = question['image'] if isinstance(images, str): images = [images] idx = question.pop('index', 'XXX') print(f'INDEX: {idx}') for im in images: image = decode_base64_to_image(im, target_size=target_size) display(image) # noqa: F821 for k in keys: try: if not pd.isna(question[k]): print(f'{k.upper()}. {question[k]}') except ValueError: if False in pd.isna(question[k]): print(f'{k.upper()}. {question[k]}') def encode_image_to_base64(img, target_size=-1): # if target_size == -1, will not do resizing # else, will set the max_size ot (target_size, target_size) if img.mode in ('RGBA', 'P'): img = img.convert('RGB') tmp = osp.join('/tmp', str(uuid4()) + '.jpg') if target_size > 0: img.thumbnail((target_size, target_size)) img.save(tmp) with open(tmp, 'rb') as image_file: image_data = image_file.read() ret = base64.b64encode(image_data).decode('utf-8') os.remove(tmp) return ret def encode_image_file_to_base64(image_path, target_size=-1): image = Image.open(image_path) return encode_image_to_base64(image, target_size=target_size) def decode_base64_to_image(base64_string, target_size=-1): image_data = base64.b64decode(base64_string) image = Image.open(io.BytesIO(image_data)) if image.mode in ('RGBA', 'P'): image = image.convert('RGB') if target_size > 0: image.thumbnail((target_size, target_size)) return image def decode_base64_to_image_file(base64_string, image_path, target_size=-1): image = decode_base64_to_image(base64_string, target_size=target_size) image.save(image_path) def build_option_str(option_dict): s = 'There are several options: \n' for c, content in option_dict.items(): if not pd.isna(content): s += f'{c}. {content}\n' return s def isimg(s): return osp.exists(s) or s.startswith('http') def read_ok(img_path): if not osp.exists(img_path): return False try: im = Image.open(img_path) assert im.size[0] > 0 and im.size[1] > 0 return True except: return False def gpt_key_set(): openai_key = os.environ.get('OPENAI_API_KEY', None) return isinstance(openai_key, str) and openai_key.startswith('sk-') def apiok(wrapper): s = wrapper.generate('Hello!') return wrapper.fail_msg not in s def circular_pred(df, extract_func=None): if extract_func is None: extract_func = lambda x: x # noqa: E731 df = df.sort_values('index') from vlmeval.utils import can_infer_option shift = int(1e6) choices = [extract_func(x) for x in df['prediction']] pred_map = {i: c for i, c in zip(df['index'], choices)} flag_map = {i: True for i in pred_map if i < 1e6} valid_map = {i: True for i in pred_map if i < 1e6} for i in df['index']: if i >= shift and pred_map[i] and pred_map[i - shift]: if ( pred_map[i] not in list(string.ascii_uppercase) or # noqa: W504 pred_map[i - shift] not in list(string.ascii_uppercase) ): valid_map[i % shift] = False continue if (ord(pred_map[i]) - ord(pred_map[i - shift])) % 4 == 1: continue else: flag_map[i % shift] = False flag_map = {k: v for k, v in flag_map.items() if valid_map[k]} flags = list(flag_map.values()) return np.mean(flags)