Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import { InferenceOutputError } from "../../lib/InferenceOutputError"; | |
| import type { BaseArgs, Options, RequestArgs } from "../../types"; | |
| import { request } from "../custom/request"; | |
| import { base64FromBytes } from "../../../../shared"; | |
| export type VisualQuestionAnsweringArgs = BaseArgs & { | |
| inputs: { | |
| /** | |
| * Raw image | |
| * | |
| * You can use native `File` in browsers, or `new Blob([buffer])` in node, or for a base64 image `new Blob([btoa(base64String)])`, or even `await (await fetch('...)).blob()` | |
| **/ | |
| image: Blob | ArrayBuffer; | |
| question: string; | |
| }; | |
| }; | |
| export interface VisualQuestionAnsweringOutput { | |
| /** | |
| * A string that’s the answer to a visual question. | |
| */ | |
| answer: string; | |
| /** | |
| * Answer correctness score. | |
| */ | |
| score: number; | |
| } | |
| /** | |
| * Answers a question on an image. Recommended model: dandelin/vilt-b32-finetuned-vqa. | |
| */ | |
| export async function visualQuestionAnswering( | |
| args: VisualQuestionAnsweringArgs, | |
| options?: Options | |
| ): Promise<VisualQuestionAnsweringOutput> { | |
| const reqArgs: RequestArgs = { | |
| ...args, | |
| inputs: { | |
| question: args.inputs.question, | |
| // convert Blob or ArrayBuffer to base64 | |
| image: base64FromBytes( | |
| new Uint8Array( | |
| args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer() | |
| ) | |
| ), | |
| }, | |
| } as RequestArgs; | |
| const res = ( | |
| await request<[VisualQuestionAnsweringOutput]>(reqArgs, { | |
| ...options, | |
| taskHint: "visual-question-answering", | |
| }) | |
| )?.[0]; | |
| const isValidOutput = typeof res?.answer === "string" && typeof res.score === "number"; | |
| if (!isValidOutput) { | |
| throw new InferenceOutputError("Expected Array<{answer: string, score: number}>"); | |
| } | |
| return res; | |
| } | |