File size: 1,999 Bytes
6fc683c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import base64
import io
import random
import os

# import pandas as pd
from PIL import Image
from tqdm import tqdm
import json
import string  

import pdb

# Example:
# json_ann['questions'][0]
# {
#   'answer': 'A', 
#   'choice_a': 'One', 
#   'choice_b': 'Two', 
#   'choice_c': 'Three', 
#   'choice_d': 'Four', 
#   'data_id': '1454426_2591111986', 
#   'data_type': 'image', 
#   'question': 'How many towels are in the image?', 
#   'question_id': '101669', 
#   'question_type_id': 5
# }

def convert_json_to_txt(json_path, image_path, txt_path, answer_path):
    json_ann = json.load(open(json_path, 'rb'))
    
    # pdb.set_trace()

    with open(txt_path, 'w', encoding='utf-8') as f, open(answer_path, 'w', encoding='utf-8') as fa:
        for index, item in tqdm(enumerate(json_ann['questions'])):
            if item['data_type'] != 'image':
                continue
            question = item['question'].replace('\n', ' ')
            image_file_path = os.path.join(image_path, item['data_id'])  

            options = {
                'A': item['choice_a'],
                'B': item['choice_b'],
                'C': item['choice_c'],
                'D': item['choice_d'],
            }
            for key, choice in options.items():
                choice = choice.replace('\n', ' ').strip()
                sentence = f'[image]{image_file_path}<tab>Question: {question} Answer: {choice}'  
                f.write(sentence + '\n')
                fa.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(index, question, choice, item['answer'], item['question_id'], item['question_type_id']))  

if __name__ == '__main__':
    save_dir = '/path/to/data'
    convert_json_to_txt(
                       f'{save_dir}/SEED-Bench/SEED-Bench.json', 
                       f'{save_dir}/SEED-Bench/SEED-Bench-image', 
                       f'{save_dir}/SEED-Bench/seed_bench_pplformat.txt',
                       f'{save_dir}/SEED-Bench/seed_bench_pplformat.answer'
                       )