SushantGautam commited on
Commit
1deb673
·
verified ·
1 Parent(s): 4d49145

Upload create_datasetJSON.py

Browse files
Files changed (1) hide show
  1. create_datasetJSON.py +190 -0
create_datasetJSON.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import random
3
+ import os
4
+ ds = load_dataset("SushantGautam/kvasir-points")
5
+ # features: ['image_data', 'image_sha256', 'points', 'count', 'label', 'collection_method', 'classification', 'organ'],
6
+ random.seed(42)
7
+
8
+
9
+ GENERAL_PROMPTS_V1 = {
10
+ "pointing": [
11
+ "Point to {label}\nPlease say 'This isn't in the image.' if it is not in the image.",
12
+ "Point to all occurrences of \"{label}\"",
13
+ "Point to any {label} in the image",
14
+ "Point to any {label} in the image.",
15
+ "Point: Where are the {label}",
16
+ "Show me where the {label} are",
17
+ "Can you show me where the {label} are?",
18
+ "Show me where the {label} are",
19
+ "Show me where a {label} is",
20
+ "Show me where a {label} is.",
21
+ "If there are any {label} in the image? Show me where they are.",
22
+ "Where are the {label}?",
23
+ "Generate a list of points showing where the {label} are.",
24
+ "Find the \"{label}\".",
25
+ "Find a \"{label}\".",
26
+ "Locate all {label}.",
27
+ "Locate an {label}.",
28
+ "Locate a {label}.",
29
+ "Locate every {label}.",
30
+ "Locate {label}.",
31
+ "Locate the {label}.",
32
+ "Object: {label}\nInstruction: Point to the object.",
33
+ "find {label}",
34
+ "find {label}.",
35
+ "Point to every {label}",
36
+ "find any {label} in the picture",
37
+ "Find the {label}",
38
+ "Find any {label}",
39
+ "Point to a {label}",
40
+ "Point to an {label}",
41
+ "Look for {label} in the image and show me where they are.",
42
+ "Help me find an object in the image by pointing to them.\nObject: {label}.",
43
+ "I am looking for {label}, where can they be found in the image?",
44
+ "Can you see any {label} in the image? Point to them.",
45
+ "Point out each {label} in the image.",
46
+ "Point out every {label} in the image.",
47
+ "Point to the {label} in the image.",
48
+ "Locate each {label} in the image.",
49
+ "Can you point out all {label} in this image?",
50
+ "Please find {label} and show me where they are.",
51
+ "If there are any {label} present, indicate their positions.",
52
+ "If there is a {label} present, indicate its positions.",
53
+ "show me all visible {label}",
54
+ ],
55
+ "point_count": [
56
+ "How many {label} are there?",
57
+ "How many {label}?",
58
+ "How many {label}.",
59
+ "how many {label}.",
60
+ "how many {label}?",
61
+ "How many {label} are there in the image?",
62
+ "Tell me how many {label} there are",
63
+ "Tell me how many {label} there are and point to them.",
64
+ "how many {label}",
65
+ "Tell me where each {label} is.",
66
+ "Tell me how many {label} are in the image",
67
+ "count {label}",
68
+ "count every {label}",
69
+ "count each {label}",
70
+ "count {label}.",
71
+ "Count the {label}.",
72
+ "How many {label} do you see?",
73
+ "How many {label} are visible?",
74
+ "Count all the {label}",
75
+ "how mmny {label}?",
76
+ "Count every {label} in the picture.",
77
+ "Count all the {label}",
78
+ "Count each {label}",
79
+ "Point to and count the {label} in the picture.",
80
+ "Point and count {label}",
81
+ "Point to every {label}",
82
+ "Locate the {label} and count them",
83
+ "Locate every {label} and count them",
84
+ "Find all the {label}. How many are there?",
85
+ "Find each {label}. How many are there?",
86
+ "Point at {label} and then tell me the count.",
87
+ "What is the total number of {label} in the image?",
88
+ "In all the picture, how many {label} are there?",
89
+ "Point at the {label} and then count them.",
90
+ "Point to all the visible {label} output the total count.",
91
+ "Point to all the {label} visible and output the total count. \nPlease say 'This isn't in the image.' if it is not in the image.",
92
+ "Point to all occurrences of \"{label}\" and output the total count.",
93
+ "Show me where the {label} are and output the total count.",
94
+ "Where are the {label}? How many are there?",
95
+ "Generate list of points showing where the {label} are and output the total count.",
96
+ "Object: {label}\nInstruction: Point to the object and output the total count.",
97
+ "find any {label} in the picture and output the total count.",
98
+ "Can you see any {label} in the image? Point to them and output the total count.",
99
+ "Can you point out all {label} in this image? How many are there?",
100
+ "If there are any {label} present, indicate their positions and output the total count.",
101
+ "How many {label} are there in the image? Point to them and output the total count.",
102
+ "How many {label} are there in the image?",
103
+ "Give me the count of {label} in the image.",
104
+ "How many {label} are visible in the image?",
105
+ "How many {label} are there?",
106
+ "In the image, how many {label} are there?",
107
+ "Can you count the number of {label} in the image?",
108
+ "Can you count every {label} in the picture?",
109
+ "Can you see any {label} in the image? How many are there?",
110
+ "Are there any {label} in the image? How many are there?",
111
+ "If you see any {label} in the image, give me the count. Otherwise, say 'This isn't in the image.'",
112
+ "Object: {label}\nInstruction: How many are there?",
113
+ ],
114
+ "count_then_point": [
115
+ "Count the {label} in the image, then point to them.",
116
+ "How many {label} are there? Point to them.",
117
+ "Count every {label} in the picture, then point to them.",
118
+ "Locate the {label} and count them, then point to them.",
119
+ "Find all the {label}. How many are there? Point to them.",
120
+ "Find each {label}. How many are there? Point to them.",
121
+ "Point to and count the {label} in the picture.",
122
+ ],
123
+ "only_count": [
124
+ "Count the {label} in the image.",
125
+ "How many {label} are there?",
126
+ "Count every {label} in the picture.",
127
+ "Locate the {label} and count them.",
128
+ "Find all the {label}. How many are there?",
129
+ "Find each {label}. How many are there?",
130
+ ],
131
+ }
132
+
133
+ jsonl=[]
134
+
135
+ import cv2
136
+ def molmo_coords(coords, w, h):
137
+ return coords[0] / w * 100, coords[1] / h * 100
138
+
139
+ for idx, data in enumerate(ds['train']):
140
+ # if idx > 10:
141
+ # break
142
+ print(data)
143
+ points = data['points'][0]
144
+
145
+ image_name= "/home/sushant/D1/MIUA/kvasir-format/images/"+ data['image_sha256']+'.png'
146
+ h, w = cv2.imread(image_name).shape[:2]
147
+ # mol_points = [molmo_coords(p, h, w) for p in points]
148
+ mol_points = points
149
+ label = data['label']
150
+ if label == 'normal':
151
+ label= random.choice(["normal sperms", 'sperms'])
152
+ elif label == 'pinhead':
153
+ label = random.choice(['pinhead sperms', 'pinheads'])
154
+ elif label == 'cluster':
155
+ label = random.choice(["clusters"," sperm clusters"])
156
+ elif label == 'instrument':
157
+ label = random.choice(["instruments"])
158
+ elif label == 'polyps':
159
+ label = random.choice(['polyps'])
160
+ s = f"""<points {' '.join(f'x{i+1}="{x:.1f}" y{i+1}="{y:.1f}"' for i, (x, y) in enumerate(mol_points))} alt="{label}">{label}</points>"""
161
+
162
+
163
+ # qsn = random.choice(GENERAL_PROMPTS_V1["pointing"]).format(label=label)
164
+ # qsn = f"point to {label}, output its coordinates in JSON format"
165
+ # entry = {"query":json.dumps(qsn), "response": s, "images": [image_name]}
166
+
167
+ entry= {"messages": [
168
+ # {"role": "system", "content": "You are a useful and harmless assistant"},
169
+ # {"role": "user", "content": f"<|image|> point to {label}"},
170
+ {"role": "user", "content": "<|image|> " + random.choice(GENERAL_PROMPTS_V1["pointing"]).format(label=label)},
171
+ {"role": "assistant", "content": s}
172
+ ],
173
+ "images": [image_name]}
174
+ # if not os.path.exists(image_name):
175
+ # data['image_data'].save(image_name)
176
+ # entry = {"messages": [
177
+ # {"role": "user", "content": qsn},
178
+ # {"role": "assistant", "content": s}],
179
+ # "images": [image_name]}
180
+ print(entry)
181
+
182
+ jsonl.append(entry)
183
+
184
+ # breakpoint()
185
+
186
+ import json
187
+ with open("kvasir_valid.jsonl", "w") as f:
188
+ for entry in jsonl:
189
+ f.write(json.dumps(entry))
190
+ f.write("\n")