import type { TaskDataCustom } from "..";

const taskData: TaskDataCustom = {
	datasets: [
		{
			// TODO write proper description
			description: "Dataset from 12M image-text of Reddit",
			id: "red_caps",
		},
		{
			// TODO write proper description
			description: "Dataset from 3.3M images of Google",
			id: "datasets/conceptual_captions",
		},
	],
	demo: {
		inputs: [
			{
				filename: "savanna.jpg",
				type: "img",
			},
		],
		outputs: [
			{
				label: "Detailed description",
				content: "a herd of giraffes and zebras grazing in a field",
				type: "text",
			},
		],
	},
	metrics: [],
	models: [
		{
			description: "A robust image captioning model.",
			id: "Salesforce/blip2-opt-2.7b",
		},
		{
			description: "A powerful and accurate image-to-text model that can also localize concepts in images.",
			id: "microsoft/kosmos-2-patch14-224",
		},
		{
			description: "A strong optical character recognition model.",
			id: "facebook/nougat-base",
		},
		{
			description: "A powerful model that lets you have a conversation with the image.",
			id: "llava-hf/llava-1.5-7b-hf",
		},
	],
	spaces: [
		{
			description: "An application that compares various image captioning models.",
			id: "nielsr/comparing-captioning-models",
		},
		{
			description: "A robust image captioning application.",
			id: "flax-community/image-captioning",
		},
		{
			description: "An application that transcribes handwritings into text.",
			id: "nielsr/TrOCR-handwritten",
		},
		{
			description: "An application that can caption images and answer questions about a given image.",
			id: "Salesforce/BLIP",
		},
		{
			description: "An application that can caption images and answer questions with a conversational agent.",
			id: "Salesforce/BLIP2",
		},
		{
			description: "An image captioning application that demonstrates the effect of noise on captions.",
			id: "johko/capdec-image-captioning",
		},
	],
	summary:
		"Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.",
	widgetModels: ["Salesforce/blip-image-captioning-base"],
	youtubeId: "",
};

export default taskData;