Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,293 Bytes
b2ecf7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import type { TaskDataCustom } from "../Types";
const taskData: TaskDataCustom = {
datasets: [
{
// TODO write proper description
description: "Dataset from 12M image-text of Reddit",
id: "red_caps",
},
{
// TODO write proper description
description: "Dataset from 3.3M images of Google",
id: "datasets/conceptual_captions",
},
],
demo: {
inputs: [
{
filename: "savanna.jpg",
type: "img",
},
],
outputs: [
{
label: "Detailed description",
content: "a herd of giraffes and zebras grazing in a field",
type: "text",
},
],
},
metrics: [],
models: [
{
description: "A robust image captioning model.",
id: "Salesforce/blip-image-captioning-large",
},
{
description: "A strong image captioning model.",
id: "nlpconnect/vit-gpt2-image-captioning",
},
{
description: "A strong optical character recognition model.",
id: "microsoft/trocr-base-printed",
},
{
description: "A strong visual question answering model for scientific diagrams.",
id: "google/pix2struct-ai2d-base",
},
{
description: "A strong captioning model for UI components.",
id: "google/pix2struct-widget-captioning-base",
},
{
description: "A captioning model for images that contain text.",
id: "google/pix2struct-textcaps-base",
},
],
spaces: [
{
description: "A robust image captioning application.",
id: "flax-community/image-captioning",
},
{
description: "An application that transcribes handwritings into text.",
id: "nielsr/TrOCR-handwritten",
},
{
description: "An application that can caption images and answer questions about a given image.",
id: "Salesforce/BLIP",
},
{
description: "An application that can caption images and answer questions with a conversational agent.",
id: "Salesforce/BLIP2",
},
{
description: "An image captioning application that demonstrates the effect of noise on captions.",
id: "johko/capdec-image-captioning",
},
],
summary:
"Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.",
widgetModels: ["Salesforce/blip-image-captioning-base"],
youtubeId: "",
};
export default taskData;
|