Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,162 Bytes
b2ecf7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import type { TaskDataCustom } from "../Types";
const taskData: TaskDataCustom = {
datasets: [
{
description: "A widely used dataset containing questions (with answers) about images.",
id: "Graphcore/vqa",
},
{
description: "A dataset to benchmark visual reasoning based on text in images.",
id: "textvqa",
},
],
demo: {
inputs: [
{
filename: "elephant.jpeg",
type: "img",
},
{
label: "Question",
content: "What is in this image?",
type: "text",
},
],
outputs: [
{
type: "chart",
data: [
{
label: "elephant",
score: 0.97,
},
{
label: "elephants",
score: 0.06,
},
{
label: "animal",
score: 0.003,
},
],
},
],
},
isPlaceholder: false,
metrics: [
{
description: "",
id: "accuracy",
},
{
description:
"Measures how much a predicted answer differs from the ground truth based on the difference in their semantic meaning.",
id: "wu-palmer similarity",
},
],
models: [
{
description: "A visual question answering model trained to convert charts and plots to text.",
id: "google/deplot",
},
{
description:
"A visual question answering model trained for mathematical reasoning and chart derendering from images.",
id: "google/matcha-base ",
},
{
description: "A strong visual question answering that answers questions from book covers.",
id: "google/pix2struct-ocrvqa-large",
},
],
spaces: [
{
description: "An application that can answer questions based on images.",
id: "nielsr/vilt-vqa",
},
{
description: "An application that can caption images and answer questions about a given image. ",
id: "Salesforce/BLIP",
},
{
description: "An application that can caption images and answer questions about a given image. ",
id: "vumichien/Img2Prompt",
},
],
summary:
"Visual Question Answering is the task of answering open-ended questions based on an image. They output natural language responses to natural language questions.",
widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
youtubeId: "",
};
export default taskData;
|