from datasets import load_dataset import random import os ds = load_dataset("SushantGautam/kvasir-points") # features: ['image_data', 'image_sha256', 'points', 'count', 'label', 'collection_method', 'classification', 'organ'], random.seed(42) GENERAL_PROMPTS_V1 = { "pointing": [ "Point to {label}\nPlease say 'This isn't in the image.' if it is not in the image.", "Point to all occurrences of \"{label}\"", "Point to any {label} in the image", "Point to any {label} in the image.", "Point: Where are the {label}", "Show me where the {label} are", "Can you show me where the {label} are?", "Show me where the {label} are", "Show me where a {label} is", "Show me where a {label} is.", "If there are any {label} in the image? Show me where they are.", "Where are the {label}?", "Generate a list of points showing where the {label} are.", "Find the \"{label}\".", "Find a \"{label}\".", "Locate all {label}.", "Locate an {label}.", "Locate a {label}.", "Locate every {label}.", "Locate {label}.", "Locate the {label}.", "Object: {label}\nInstruction: Point to the object.", "find {label}", "find {label}.", "Point to every {label}", "find any {label} in the picture", "Find the {label}", "Find any {label}", "Point to a {label}", "Point to an {label}", "Look for {label} in the image and show me where they are.", "Help me find an object in the image by pointing to them.\nObject: {label}.", "I am looking for {label}, where can they be found in the image?", "Can you see any {label} in the image? Point to them.", "Point out each {label} in the image.", "Point out every {label} in the image.", "Point to the {label} in the image.", "Locate each {label} in the image.", "Can you point out all {label} in this image?", "Please find {label} and show me where they are.", "If there are any {label} present, indicate their positions.", "If there is a {label} present, indicate its positions.", "show me all visible {label}", ], "point_count": [ "How many {label} are there?", "How many {label}?", "How many {label}.", "how many {label}.", "how many {label}?", "How many {label} are there in the image?", "Tell me how many {label} there are", "Tell me how many {label} there are and point to them.", "how many {label}", "Tell me where each {label} is.", "Tell me how many {label} are in the image", "count {label}", "count every {label}", "count each {label}", "count {label}.", "Count the {label}.", "How many {label} do you see?", "How many {label} are visible?", "Count all the {label}", "how mmny {label}?", "Count every {label} in the picture.", "Count all the {label}", "Count each {label}", "Point to and count the {label} in the picture.", "Point and count {label}", "Point to every {label}", "Locate the {label} and count them", "Locate every {label} and count them", "Find all the {label}. How many are there?", "Find each {label}. How many are there?", "Point at {label} and then tell me the count.", "What is the total number of {label} in the image?", "In all the picture, how many {label} are there?", "Point at the {label} and then count them.", "Point to all the visible {label} output the total count.", "Point to all the {label} visible and output the total count. \nPlease say 'This isn't in the image.' if it is not in the image.", "Point to all occurrences of \"{label}\" and output the total count.", "Show me where the {label} are and output the total count.", "Where are the {label}? How many are there?", "Generate list of points showing where the {label} are and output the total count.", "Object: {label}\nInstruction: Point to the object and output the total count.", "find any {label} in the picture and output the total count.", "Can you see any {label} in the image? Point to them and output the total count.", "Can you point out all {label} in this image? How many are there?", "If there are any {label} present, indicate their positions and output the total count.", "How many {label} are there in the image? Point to them and output the total count.", "How many {label} are there in the image?", "Give me the count of {label} in the image.", "How many {label} are visible in the image?", "How many {label} are there?", "In the image, how many {label} are there?", "Can you count the number of {label} in the image?", "Can you count every {label} in the picture?", "Can you see any {label} in the image? How many are there?", "Are there any {label} in the image? How many are there?", "If you see any {label} in the image, give me the count. Otherwise, say 'This isn't in the image.'", "Object: {label}\nInstruction: How many are there?", ], "count_then_point": [ "Count the {label} in the image, then point to them.", "How many {label} are there? Point to them.", "Count every {label} in the picture, then point to them.", "Locate the {label} and count them, then point to them.", "Find all the {label}. How many are there? Point to them.", "Find each {label}. How many are there? Point to them.", "Point to and count the {label} in the picture.", ], "only_count": [ "Count the {label} in the image.", "How many {label} are there?", "Count every {label} in the picture.", "Locate the {label} and count them.", "Find all the {label}. How many are there?", "Find each {label}. How many are there?", ], } jsonl=[] import cv2 def molmo_coords(coords, w, h): return coords[0] / w * 100, coords[1] / h * 100 for idx, data in enumerate(ds['train']): # if idx > 10: # break print(data) points = data['points'][0] image_name= "/home/sushant/D1/MIUA/kvasir-format/images/"+ data['image_sha256']+'.png' h, w = cv2.imread(image_name).shape[:2] # mol_points = [molmo_coords(p, h, w) for p in points] mol_points = points label = data['label'] if label == 'normal': label= random.choice(["normal sperms", 'sperms']) elif label == 'pinhead': label = random.choice(['pinhead sperms', 'pinheads']) elif label == 'cluster': label = random.choice(["clusters"," sperm clusters"]) elif label == 'instrument': label = random.choice(["instruments"]) elif label == 'polyps': label = random.choice(['polyps']) s = f"""{label}""" # qsn = random.choice(GENERAL_PROMPTS_V1["pointing"]).format(label=label) # qsn = f"point to {label}, output its coordinates in JSON format" # entry = {"query":json.dumps(qsn), "response": s, "images": [image_name]} entry= {"messages": [ # {"role": "system", "content": "You are a useful and harmless assistant"}, # {"role": "user", "content": f"<|image|> point to {label}"}, {"role": "user", "content": "<|image|> " + random.choice(GENERAL_PROMPTS_V1["pointing"]).format(label=label)}, {"role": "assistant", "content": s} ], "images": [image_name]} # if not os.path.exists(image_name): # data['image_data'].save(image_name) # entry = {"messages": [ # {"role": "user", "content": qsn}, # {"role": "assistant", "content": s}], # "images": [image_name]} print(entry) jsonl.append(entry) # breakpoint() import json with open("kvasir_valid.jsonl", "w") as f: for entry in jsonl: f.write(json.dumps(entry)) f.write("\n")