File size: 2,162 Bytes
b2ecf7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import type { TaskDataCustom } from "../Types";

const taskData: TaskDataCustom = {
	datasets: [
		{
			description: "A widely used dataset containing questions (with answers) about images.",
			id: "Graphcore/vqa",
		},
		{
			description: "A dataset to benchmark visual reasoning based on text in images.",
			id: "textvqa",
		},
	],
	demo: {
		inputs: [
			{
				filename: "elephant.jpeg",
				type: "img",
			},
			{
				label: "Question",
				content: "What is in this image?",
				type: "text",
			},
		],
		outputs: [
			{
				type: "chart",
				data: [
					{
						label: "elephant",
						score: 0.97,
					},
					{
						label: "elephants",
						score: 0.06,
					},
					{
						label: "animal",
						score: 0.003,
					},
				],
			},
		],
	},
	isPlaceholder: false,
	metrics: [
		{
			description: "",
			id: "accuracy",
		},
		{
			description:
				"Measures how much a predicted answer differs from the ground truth based on the difference in their semantic meaning.",
			id: "wu-palmer similarity",
		},
	],
	models: [
		{
			description: "A visual question answering model trained to convert charts and plots to text.",
			id: "google/deplot",
		},
		{
			description:
				"A visual question answering model trained for mathematical reasoning and chart derendering from images.",
			id: "google/matcha-base ",
		},
		{
			description: "A strong visual question answering that answers questions from book covers.",
			id: "google/pix2struct-ocrvqa-large",
		},
	],
	spaces: [
		{
			description: "An application that can answer questions based on images.",
			id: "nielsr/vilt-vqa",
		},
		{
			description: "An application that can caption images and answer questions about a given image. ",
			id: "Salesforce/BLIP",
		},
		{
			description: "An application that can caption images and answer questions about a given image. ",
			id: "vumichien/Img2Prompt",
		},
	],
	summary:
		"Visual Question Answering is the task of answering open-ended questions based on an image. They output natural language responses to natural language questions.",
	widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
	youtubeId: "",
};

export default taskData;