File size: 4,909 Bytes
134cb11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import json
import numpy as np
from tqdm import tqdm
from pathlib import Path

from videollava.constants import DEFAULT_VIDEO_TOKEN

from infer_utils import run_inference_single


def run_cdvqa_inference(
        model,
        dataset_path,
        processor,
        tokenizer,
        conv_mode,
        use_video_data=False,
        open_prompt=None,
        repeat_frames=None,
        prompt_strategy="interleave",
        chronological_prefix=True,
        data_frac=1,
        data_size=None,
	    delete_system_prompt=False,
        last_image=False,
        start_ind=None,
        end_ind=None,
        print_prompt=False,
        **kwargs
    ):
    for k, v in kwargs.items():
        print("WARNING: Unused argument:", k, v)

    dataset_path = Path(dataset_path)
    data_dir = dataset_path.parent
    questions_path = data_dir / dataset_path.name.replace(".json", "_questions.json")
    answers_path = data_dir / dataset_path.name.replace(".json", "_answers.json")
    images_path = data_dir / dataset_path.name.replace(".json", "_images.json")

    with open(questions_path) as json_data:
        questionsJSON = json.load(json_data)

    with open(answers_path) as json_data:
        answersJSON = json.load(json_data)

    with open(images_path) as json_data:
        imagesJSON = json.load(json_data)

    if data_size is not None:
        data_size = min(data_size, len(questionsJSON))
        idx = np.random.choice(len(questionsJSON), data_size, replace=False)
        imagesJSON = [imagesJSON[i] for i in idx]
    elif data_frac < 1:
        idx = np.random.choice(len(questionsJSON), int(len(questionsJSON) * data_frac), replace=False)
        imagesJSON = [imagesJSON[i] for i in idx]

    # Get the image IDs of test images
    images_ids = [img['id'] for img in imagesJSON['images'] if img['active']]

    if start_ind is not None and end_ind is not None:
        print("Subsetting data from index", start_ind, "to", end_ind)
        images_ids = images_ids[start_ind:end_ind]
    elif start_ind is not None:
        print("Subsetting data from index", start_ind, "to end")
        images_ids = images_ids[start_ind:]
    elif end_ind is not None:
        print("Subsetting data from start to index", end_ind)
        images_ids = images_ids[:end_ind]

    # Store all predicted answers
    answers = {}
    # Read image corresponding to each ID and get its associated question and answer
    for id in tqdm(images_ids):
        file_name = imagesJSON['images'][id]['file_name']

        image_paths = [
            str(data_dir / "second_dataset" / "im1" / file_name),
            str(data_dir / "second_dataset" / "im2" / file_name),
        ]

        for questionid in imagesJSON['images'][id]['questions_ids']:
            question = questionsJSON['questions'][questionid]
            if not question['active']:
                continue
            inp = "This is a pair of satellite images capturing the same location at different times: "
            inp = inp + DEFAULT_VIDEO_TOKEN + '\n'
            inp = inp + question["question"]
            type_str = question["type"]
            answer_str = answersJSON['answers'][question["answers_ids"][0]]['answer']

            if type_str in ["change_or_not", "increase_or_not", "decrease_or_not"]:
                inp = inp + " Answer with yes or no."

            elif type_str == "change_ratio":
                inp = inp + " Choose from one of the following options: 0, 0_to_10, 10_to_20, 20_to_30, 30_to_40, 40_to_50, 50_to_60, 60_to_70, 70_to_80, 80_to_90, 90_to_100."

            elif type_str == "change_ratio_types":
                inp = inp + " Choose from one of the following options: 0, 0_to_10, 10_to_20, 20_to_30, 30_to_40, 40_to_50, 50_to_60, 60_to_70."

            else: # smallest_change, largest_change, change_to_what
                inp = inp + " Choose from one of the following options: buildings, low_vegetation, nonvegetated ground surface, playgrounds, trees, water."
                answer_str = answer_str.replace("NVG_surface", "nonvegetated ground surface")    

            outputs = run_inference_single(
                model=model,
                processor=processor,
                tokenizer=tokenizer,
                conv_mode=conv_mode,
                inp=inp,
                image_paths=image_paths,
                metadata=None,
                use_video_data=use_video_data,
                repeat_frames=repeat_frames,
                prompt_strategy=prompt_strategy,
                chronological_prefix=chronological_prefix,
                delete_system_prompt=delete_system_prompt,
                last_image=last_image,
                print_prompt=print_prompt
            )

            answers[f"{id}_{questionid}"] = {
                "predicted": outputs,
                "ground_truth": answer_str,
                "task": type_str
            }

    return answers