Spaces:

huggingfacejs
/

inference-widgets

Running on CPU Upgrade

App Files Files Community

machineuser commited on Jan 26, 2024

Commit

afa4e5a

1 Parent(s): 5852013

Sync widgets demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

packages/tasks/package.json +7 -3
packages/tasks/pnpm-lock.yaml +209 -0
packages/tasks/src/scripts/inference-codegen.ts +192 -0
packages/tasks/src/tasks/audio-classification/inference.ts +51 -0
packages/tasks/src/tasks/audio-classification/spec/input.json +34 -0
packages/tasks/src/tasks/audio-classification/spec/output.json +21 -0
packages/tasks/src/tasks/automatic-speech-recognition/inference.ts +154 -0
packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json +34 -0
packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json +36 -0
packages/tasks/src/tasks/common-definitions.json +109 -0
packages/tasks/src/tasks/depth-estimation/inference.ts +35 -0
packages/tasks/src/tasks/depth-estimation/spec/input.json +30 -0
packages/tasks/src/tasks/depth-estimation/spec/output.json +10 -0
packages/tasks/src/tasks/document-question-answering/inference.ts +102 -0
packages/tasks/src/tasks/document-question-answering/spec/input.json +85 -0
packages/tasks/src/tasks/document-question-answering/spec/output.json +36 -0
packages/tasks/src/tasks/feature-extraction/inference.ts +22 -0
packages/tasks/src/tasks/feature-extraction/spec/input.json +26 -0
packages/tasks/src/tasks/feature-extraction/spec/output.json +7 -0
packages/tasks/src/tasks/fill-mask/inference.ts +61 -0
packages/tasks/src/tasks/fill-mask/spec/input.json +38 -0
packages/tasks/src/tasks/fill-mask/spec/output.json +29 -0
packages/tasks/src/tasks/image-classification/inference.ts +51 -0
packages/tasks/src/tasks/image-classification/spec/input.json +34 -0
packages/tasks/src/tasks/image-classification/spec/output.json +10 -0
packages/tasks/src/tasks/image-segmentation/inference.ts +65 -0
packages/tasks/src/tasks/image-segmentation/spec/input.json +54 -0
packages/tasks/src/tasks/image-segmentation/spec/output.json +25 -0
packages/tasks/src/tasks/image-to-image/inference.ts +67 -0
packages/tasks/src/tasks/image-to-image/spec/input.json +52 -0
packages/tasks/src/tasks/image-to-image/spec/output.json +12 -0
packages/tasks/src/tasks/image-to-text/inference.ts +138 -0
packages/tasks/src/tasks/image-to-text/spec/input.json +34 -0
packages/tasks/src/tasks/image-to-text/spec/output.json +17 -0
packages/tasks/src/tasks/index.ts +1 -0
packages/tasks/src/tasks/object-detection/inference.ts +62 -0
packages/tasks/src/tasks/object-detection/spec/input.json +30 -0
packages/tasks/src/tasks/object-detection/spec/output.json +46 -0
packages/tasks/src/tasks/placeholder/data.ts +3 -0
packages/tasks/src/tasks/placeholder/spec/input.json +35 -0
packages/tasks/src/tasks/placeholder/spec/output.json +17 -0
packages/tasks/src/tasks/question-answering/inference.ts +99 -0
packages/tasks/src/tasks/question-answering/spec/input.json +67 -0
packages/tasks/src/tasks/question-answering/spec/output.json +29 -0
packages/tasks/src/tasks/sentence-similarity/inference.ts +32 -0
packages/tasks/src/tasks/sentence-similarity/spec/input.json +40 -0
packages/tasks/src/tasks/sentence-similarity/spec/output.json +12 -0
packages/tasks/src/tasks/summarization/data.ts +1 -0
packages/tasks/src/tasks/summarization/inference.ts +58 -0
packages/tasks/src/tasks/summarization/spec/input.json +7 -0

packages/tasks/package.json CHANGED Viewed

@@ -24,9 +24,10 @@
 		"format": "prettier --write .",
 		"format:check": "prettier --check .",
 		"prepublishOnly": "pnpm run build",
-		"build": "tsup src/index.ts --format cjs,esm --clean --dts",
 		"prepare": "pnpm run build",
-		"check": "tsc"
 	},
 	"files": [
 		"dist",
@@ -40,5 +41,8 @@
 	],
 	"author": "Hugging Face",
 	"license": "MIT",
-	"devDependencies": {}
 }

 		"format": "prettier --write .",
 		"format:check": "prettier --check .",
 		"prepublishOnly": "pnpm run build",
+		"build": "tsup src/index.ts src/scripts/**.ts --format cjs,esm --clean --dts",
 		"prepare": "pnpm run build",
+		"check": "tsc",
+		"inference-codegen": "pnpm run build && node dist/scripts/inference-codegen.js"
 	},
 	"files": [
 		"dist",
 	],
 	"author": "Hugging Face",
 	"license": "MIT",
+	"devDependencies": {
+		"@types/node": "^20.11.5",
+		"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.15/packages/quicktype-core/quicktype-core-18.0.15.tgz"
+	}
 }

packages/tasks/pnpm-lock.yaml CHANGED Viewed

@@ -3,3 +3,212 @@ lockfileVersion: '6.0'
 settings:
   autoInstallPeers: true
   excludeLinksFromLockfile: false

 settings:
   autoInstallPeers: true
   excludeLinksFromLockfile: false
+devDependencies:
+  '@types/node':
+    specifier: ^20.11.5
+    version: 20.11.5
+  quicktype-core:
+    specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.15/packages/quicktype-core/quicktype-core-18.0.15.tgz
+    version: '@github.com/huggingface/quicktype/raw/pack-18.0.15/packages/quicktype-core/quicktype-core-18.0.15.tgz'
+packages:
+  /@glideapps/[email protected]:
+    resolution: {integrity: sha512-q9U8v/n9qbkd2zDYjuX3qtlbl+OIyI9zF+zQhZjfYOE9VMDH7tfcUSJ9p0lXoY3lxmGFne09yi4iiNeQUwV7AA==}
+    dev: true
+  /@types/[email protected]:
+    resolution: {integrity: sha512-g557vgQjUUfN76MZAN/dt1z3dzcUsimuysco0KeluHgrPdJXkP/XdAURgyO2W9fZWHRtRBiVKzKn8vyOAwlG+w==}
+    dependencies:
+      undici-types: 5.26.5
+    dev: true
+  /@types/[email protected]:
+    resolution: {integrity: sha512-XOfUup9r3Y06nFAZh3WvO0rBU4OtlfPB/vgxpjg+NRdGU6CN6djdc6OEiH+PcqHCY6eFLo9Ista73uarf4gnBg==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
+    engines: {node: '>=6.5'}
+    dependencies:
+      event-target-shim: 5.0.1
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-8CVjaLJGuSKMVTxJ2DpBl5XnlNDiT4cQFeuCJJrvJmts9YrTZDizTX7PjC2s6W4x+MBGZeEY6dGMrF04/6Hgqg==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==}
+    dependencies:
+      base64-js: 1.5.1
+      ieee754: 1.2.1
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-LA2YTIlR7biSpXkKYwwuzGjwL5rjWEZVOSnvdUc7gObvWe4WkjxOpfrdhoP7Hs09YWDVfg0Mal9BpAqLfVEzQg==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==}
+    dependencies:
+      node-fetch: 2.7.0
+    transitivePeerDependencies:
+      - encoding
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
+    engines: {node: '>=6'}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==}
+    engines: {node: '>=0.8.x'}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-NPrWuHFxFUknr1KqJRDgUQPexQF0uIJWjeT+2KjEePhitQxQEx5EJBG1lVn5/hc8aLycTpXrDOgPQ6Zq+EDiTA==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
+    engines: {node: 4.x || >=6.0.0}
+    peerDependencies:
+      encoding: ^0.1.0
+    peerDependenciesMeta:
+      encoding:
+        optional: true
+    dependencies:
+      whatwg-url: 5.0.0
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==}
+    engines: {node: '>=4'}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
+    engines: {node: '>= 0.6.0'}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-Lk/fICSyIhodxy1IDK2HazkeGjSmezAWX2egdtJnYhtzKEsBPJowlI6F6LPb5tqIQILrMbx22S5o3GuJavPusA==}
+    engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
+    dependencies:
+      abort-controller: 3.0.0
+      buffer: 6.0.3
+      events: 3.3.0
+      process: 0.11.10
+      string_decoder: 1.3.0
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==}
+    dependencies:
+      safe-buffer: 5.2.1
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-pkY1fj1cKHb2seWDy0B16HeWyczlJA9/WW3u3c4z/NiWDsO3DOU5D7nhTLE9CF0yXv/QZFY7sEJmj24dK+Rrqw==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-CLjCCLQ6UuMxWnbIylkisbRj31qxHPAurvena/0iwSVbQ2G1VY5/HjV0IRabOEbDHlzZlRdCrD4NhB0JtU40Pg==}
+    dependencies:
+      base64-js: 1.5.1
+      unicode-trie: 2.0.0
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-x7bc76x0bm4prf1VLg79uhAzKw8DVboClSN5VxJuQ+LKDOVEW9CdH+VY7SP+vX7xCYQqzzgQpFqz15zeLvAtZQ==}
+    dependencies:
+      pako: 0.2.9
+      tiny-inflate: 1.0.3
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
+    dependencies:
+      tr46: 0.0.3
+      webidl-conversions: 3.0.1
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==}
+    dev: true
+  /[email protected]:
+    resolution: {integrity: sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA==}
+    engines: {node: '>= 14'}
+    dev: true
+  '@github.com/huggingface/quicktype/raw/pack-18.0.15/packages/quicktype-core/quicktype-core-18.0.15.tgz':
+    resolution: {tarball: https://github.com/huggingface/quicktype/raw/pack-18.0.15/packages/quicktype-core/quicktype-core-18.0.15.tgz}
+    name: quicktype-core
+    version: 18.0.15
+    dependencies:
+      '@glideapps/ts-necessities': 2.1.3
+      '@types/urijs': 1.19.25
+      browser-or-node: 2.1.1
+      collection-utils: 1.0.1
+      cross-fetch: 4.0.0
+      is-url: 1.2.4
+      js-base64: 3.7.6
+      lodash: 4.17.21
+      pako: 1.0.11
+      pluralize: 8.0.0
+      readable-stream: 4.4.2
+      unicode-properties: 1.4.1
+      urijs: 1.19.11
+      wordwrap: 1.0.0
+      yaml: 2.3.4
+    transitivePeerDependencies:
+      - encoding
+    dev: true

packages/tasks/src/scripts/inference-codegen.ts ADDED Viewed

	@@ -0,0 +1,192 @@

+import type { SerializedRenderResult } from "quicktype-core";
+import { quicktype, InputData, JSONSchemaInput, FetchingJSONSchemaStore } from "quicktype-core";
+import * as fs from "fs/promises";
+import { existsSync as pathExists } from "fs";
+import * as path from "path";
+import * as ts from "typescript";
+const TYPESCRIPT_HEADER_FILE = `
+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+`;
+const rootDirFinder = function (): string {
+	const parts = __dirname.split("/");
+	let level = parts.length - 1;
+	while (level > 0) {
+		const currentPath = parts.slice(0, level).join("/");
+		if (pathExists(`${currentPath}/package.json`)) {
+			return path.normalize(currentPath);
+		}
+		level--;
+	}
+	return "";
+};
+/**
+ *
+ * @param taskId The ID of the task for which we are generating code
+ * @param taskSpecDir The path to the directory where the input.json & output.json files are
+ * @param allSpecFiles An array of paths to all the tasks specs. Allows resolving cross-file references ($ref).
+ */
+async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles: string[]): Promise<InputData> {
+	const schema = new JSONSchemaInput(new FetchingJSONSchemaStore(), [], allSpecFiles);
+	await schema.addSource({
+		name: `${taskId}-input`,
+		schema: await fs.readFile(`${taskSpecDir}/input.json`, { encoding: "utf-8" }),
+	});
+	await schema.addSource({
+		name: `${taskId}-output`,
+		schema: await fs.readFile(`${taskSpecDir}/output.json`, { encoding: "utf-8" }),
+	});
+	const inputData = new InputData();
+	inputData.addInput(schema);
+	return inputData;
+}
+async function generateTypescript(inputData: InputData): Promise<SerializedRenderResult> {
+	return await quicktype({
+		inputData,
+		lang: "typescript",
+		alphabetizeProperties: true,
+		rendererOptions: {
+			"just-types": true,
+			"nice-property-names": true,
+			"prefer-unions": true,
+			"prefer-const-values": true,
+			"prefer-unknown": true,
+			"explicit-unions": true,
+		},
+	});
+}
+/**
+ * quicktype is unable to generate "top-level array types" that are defined in the output spec: https://github.com/glideapps/quicktype/issues/2481
+ * We have to use the TypeScript API to generate those types when required.
+ * This hacky function:
+ *   - looks for the generated interface for output types
+ *   - renames it with a `Element` suffix
+ *   - generates  type alias in the form `export type <OutputType> = <OutputType>Element[];
+ *
+ * And writes that to the `inference.ts` file
+ *
+ */
+async function postProcessOutput(path2generated: string, outputSpec: Record<string, unknown>): Promise<void> {
+	const source = ts.createSourceFile(
+		path.basename(path2generated),
+		await fs.readFile(path2generated, { encoding: "utf-8" }),
+		ts.ScriptTarget.ES2022
+	);
+	const exportedName = outputSpec.title;
+	if (outputSpec.type !== "array" || typeof exportedName !== "string") {
+		console.log("      Nothing to do");
+		return;
+	}
+	const topLevelNodes = source.getChildAt(0).getChildren();
+	const hasTypeAlias = topLevelNodes.some(
+		(node) =>
+			node.kind === ts.SyntaxKind.TypeAliasDeclaration &&
+			(node as ts.TypeAliasDeclaration).name.escapedText === exportedName
+	);
+	if (hasTypeAlias) {
+		return;
+	}
+	const interfaceDeclaration = topLevelNodes.find((node): node is ts.InterfaceDeclaration => {
+		if (node.kind === ts.SyntaxKind.InterfaceDeclaration) {
+			return (node as ts.InterfaceDeclaration).name.getText(source) === exportedName;
+		}
+		return false;
+	});
+	if (!interfaceDeclaration) {
+		console.log("      Nothing to do");
+		return;
+	}
+	console.log("      Inserting top-level array type alias...");
+	const updatedInterface = ts.factory.updateInterfaceDeclaration(
+		interfaceDeclaration,
+		interfaceDeclaration.modifiers,
+		ts.factory.createIdentifier(interfaceDeclaration.name.getText(source) + "Element"),
+		interfaceDeclaration.typeParameters,
+		interfaceDeclaration.heritageClauses,
+		interfaceDeclaration.members
+	);
+	const arrayDeclaration = ts.factory.createTypeAliasDeclaration(
+		[ts.factory.createModifier(ts.SyntaxKind.ExportKeyword)],
+		exportedName,
+		undefined,
+		ts.factory.createArrayTypeNode(ts.factory.createTypeReferenceNode(updatedInterface.name))
+	);
+	const printer = ts.createPrinter();
+	const newNodes = ts.factory.createNodeArray([
+		...topLevelNodes.filter((node) => node !== interfaceDeclaration),
+		arrayDeclaration,
+		updatedInterface,
+	]);
+	await fs.writeFile(path2generated, printer.printList(ts.ListFormat.MultiLine, newNodes, source), {
+		flag: "w+",
+		encoding: "utf-8",
+	});
+	return;
+}
+async function main() {
+	const rootDir = rootDirFinder();
+	const tasksDir = path.join(rootDir, "src", "tasks");
+	const allTasks = await Promise.all(
+		(await fs.readdir(tasksDir, { withFileTypes: true }))
+			.filter((entry) => entry.isDirectory())
+			.filter((entry) => entry.name !== "placeholder")
+			.map(async (entry) => ({ task: entry.name, dirPath: path.join(entry.path, entry.name) }))
+	);
+	const allSpecFiles = [
+		path.join(tasksDir, "common-definitions.json"),
+		...allTasks
+			.flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")])
+			.filter((filepath) => pathExists(filepath)),
+	];
+	for (const { task, dirPath } of allTasks) {
+		const taskSpecDir = path.join(dirPath, "spec");
+		if (!(pathExists(path.join(taskSpecDir, "input.json")) && pathExists(path.join(taskSpecDir, "output.json")))) {
+			console.debug(`No spec found for task ${task} - skipping`);
+			continue;
+		}
+		console.debug(`✨ Generating types for task`, task);
+		console.debug("   📦 Building input data");
+		const inputData = await buildInputData(task, taskSpecDir, allSpecFiles);
+		console.debug("   🏭 Generating typescript code");
+		{
+			const { lines } = await generateTypescript(inputData);
+			await fs.writeFile(`${dirPath}/inference.ts`, [TYPESCRIPT_HEADER_FILE, ...lines].join(`\n`), {
+				flag: "w+",
+				encoding: "utf-8",
+			});
+		}
+		const outputSpec = JSON.parse(await fs.readFile(`${taskSpecDir}/output.json`, { encoding: "utf-8" }));
+		console.log("   🩹 Post-processing the generated code");
+		await postProcessOutput(`${dirPath}/inference.ts`, outputSpec);
+	}
+	console.debug("✅ All done!");
+}
+let exit = 0;
+main()
+	.catch((err) => {
+		console.error("Failure", err);
+		exit = 1;
+	})
+	.finally(() => process.exit(exit));

packages/tasks/src/tasks/audio-classification/inference.ts ADDED Viewed

	@@ -0,0 +1,51 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Audio Classification inference
+ */
+export interface AudioClassificationInput {
+	/**
+	 * The input audio data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: AudioClassificationParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Audio Classification
+ */
+export interface AudioClassificationParameters {
+	functionToApply?: ClassificationOutputTransform;
+	/**
+	 * When specified, limits the output to the top K most probable classes.
+	 */
+	topK?: number;
+	[property: string]: unknown;
+}
+/**
+ * The function to apply to the model outputs in order to retrieve the scores.
+ */
+export type ClassificationOutputTransform = "sigmoid" | "softmax" | "none";
+export type AudioClassificationOutput = AudioClassificationOutputElement[];
+/**
+ * Outputs for Audio Classification inference
+ */
+export interface AudioClassificationOutputElement {
+	/**
+	 * The predicted class label (model specific).
+	 */
+	label: string;
+	/**
+	 * The corresponding probability.
+	 */
+	score: number;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/audio-classification/spec/input.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+	"$id": "/inference/schemas/audio-classification/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Audio Classification inference",
+	"title": "AudioClassificationInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input audio data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/AudioClassificationParameters"
+		}
+	},
+	"$defs": {
+		"AudioClassificationParameters": {
+			"title": "AudioClassificationParameters",
+			"description": "Additional inference parameters for Audio Classification",
+			"type": "object",
+			"properties": {
+				"functionToApply": {
+					"title": "AudioClassificationOutputTransform",
+					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
+				},
+				"topK": {
+					"type": "integer",
+					"description": "When specified, limits the output to the top K most probable classes."
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/audio-classification/spec/output.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+	"$id": "/inference/schemas/audio-classification/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"title": "AudioClassificationOutput",
+	"description": "Outputs for Audio Classification inference",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"label": {
+				"type": "string",
+				"description": "The predicted class label (model specific)."
+			},
+			"score": {
+				"type": "number",
+				"description": "The corresponding probability."
+			}
+		},
+		"required": ["label", "score"]
+	}
+}

packages/tasks/src/tasks/automatic-speech-recognition/inference.ts ADDED Viewed

	@@ -0,0 +1,154 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Automatic Speech Recognition inference
+ */
+export interface AutomaticSpeechRecognitionInput {
+	/**
+	 * The input audio data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: AutomaticSpeechRecognitionParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Automatic Speech Recognition
+ */
+export interface AutomaticSpeechRecognitionParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	/**
+	 * Whether to output corresponding timestamps with the generated text
+	 */
+	returnTimestamps?: boolean;
+	[property: string]: unknown;
+}
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * Whether to use sampling instead of greedy decoding when generating new tokens.
+	 */
+	doSample?: boolean;
+	/**
+	 * Controls the stopping condition for beam-based methods.
+	 */
+	earlyStopping?: EarlyStoppingUnion;
+	/**
+	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
+	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+	 */
+	epsilonCutoff?: number;
+	/**
+	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+	 * float strictly between 0 and 1, a token is only considered if it is greater than either
+	 * eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+	 * term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+	 * the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+	 * for more details.
+	 */
+	etaCutoff?: number;
+	/**
+	 * The maximum length (in tokens) of the generated text, including the input.
+	 */
+	maxLength?: number;
+	/**
+	 * The maximum number of tokens to generate. Takes precedence over maxLength.
+	 */
+	maxNewTokens?: number;
+	/**
+	 * The minimum length (in tokens) of the generated text, including the input.
+	 */
+	minLength?: number;
+	/**
+	 * The minimum number of tokens to generate. Takes precedence over maxLength.
+	 */
+	minNewTokens?: number;
+	/**
+	 * Number of groups to divide num_beams into in order to ensure diversity among different
+	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+	 */
+	numBeamGroups?: number;
+	/**
+	 * Number of beams to use for beam search.
+	 */
+	numBeams?: number;
+	/**
+	 * The value balances the model confidence and the degeneration penalty in contrastive
+	 * search decoding.
+	 */
+	penaltyAlpha?: number;
+	/**
+	 * The value used to modulate the next token probabilities.
+	 */
+	temperature?: number;
+	/**
+	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
+	 */
+	topK?: number;
+	/**
+	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
+	 * that add up to top_p or higher are kept for generation.
+	 */
+	topP?: number;
+	/**
+	 * Local typicality measures how similar the conditional probability of predicting a target
+	 * token next is to the expected conditional probability of predicting a random token next,
+	 * given the partial text already generated. If set to float < 1, the smallest set of the
+	 * most locally typical tokens with probabilities that add up to typical_p or higher are
+	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+	 */
+	typicalP?: number;
+	/**
+	 * Whether the model should use the past last key/values attentions to speed up decoding
+	 */
+	useCache?: boolean;
+	[property: string]: unknown;
+}
+/**
+ * Controls the stopping condition for beam-based methods.
+ */
+export type EarlyStoppingUnion = boolean | "never";
+export interface AutomaticSpeechRecognitionOutputChunk {
+	/**
+	 * A chunk of text identified by the model
+	 */
+	text: string;
+	/**
+	 * The start and end timestamps corresponding with the text
+	 */
+	timestamps: number[];
+	[property: string]: unknown;
+}
+export type AutomaticSpeechRecognitionOutput = AutomaticSpeechRecognitionOutputElement[];
+/**
+ * Outputs of inference for the Automatic Speech Recognition task
+ */
+export interface AutomaticSpeechRecognitionOutputElement {
+	/**
+	 * When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
+	 * the model.
+	 */
+	chunks?: AutomaticSpeechRecognitionOutputChunk[];
+	/**
+	 * The recognized text.
+	 */
+	text: string;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+	"$id": "/inference/schemas/automatic-speech-recognition/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Automatic Speech Recognition inference",
+	"title": "AutomaticSpeechRecognitionInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input audio data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/AutomaticSpeechRecognitionParameters"
+		}
+	},
+	"$defs": {
+		"AutomaticSpeechRecognitionParameters": {
+			"title": "AutomaticSpeechRecognitionParameters",
+			"description": "Additional inference parameters for Automatic Speech Recognition",
+			"type": "object",
+			"properties": {
+				"returnTimestamps": {
+					"type": "boolean",
+					"description": "Whether to output corresponding timestamps with the generated text"
+				},
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/common-definitions.json#/definitions/GenerationParameters"
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+	"$id": "/inference/schemas/automatic-speech-recognition/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Automatic Speech Recognition task",
+	"title": "AutomaticSpeechRecognitionOutput",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"text": {
+				"type": "string",
+				"description": "The recognized text."
+			},
+			"chunks": {
+				"type": "array",
+				"description": "When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model.",
+				"items": {
+					"type": "object",
+					"title": "AutomaticSpeechRecognitionOutputChunk",
+					"properties": {
+						"text": { "type": "string", "description": "A chunk of text identified by the model" },
+						"timestamps": {
+							"type": "array",
+							"description": "The start and end timestamps corresponding with the text",
+							"items": { "type": "number" },
+							"minLength": 2,
+							"maxLength": 2
+						}
+					},
+					"required": ["text", "timestamps"]
+				}
+			}
+		},
+		"required": ["text"]
+	}
+}

packages/tasks/src/tasks/common-definitions.json ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+	"$id": "/inference/schemas/common-definitions.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "(Incomplete!) Common type definitions shared by several tasks",
+	"definitions": {
+		"ClassificationOutputTransform": {
+			"title": "ClassificationOutputTransform",
+			"type": "string",
+			"description": "The function to apply to the model outputs in order to retrieve the scores.",
+			"oneOf": [
+				{
+					"const": "sigmoid"
+				},
+				{
+					"const": "softmax"
+				},
+				{
+					"const": "none"
+				}
+			]
+		},
+		"ClassificationOutput": {
+			"title": "ClassificationOutput",
+			"type": "object",
+			"properties": {
+				"label": {
+					"type": "string",
+					"description": "The predicted class label."
+				},
+				"score": {
+					"type": "number",
+					"description": "The corresponding probability."
+				}
+			},
+			"required": ["label", "score"]
+		},
+		"GenerationParameters": {
+			"title": "GenerationParameters",
+			"description": "Ad-hoc parametrization of the text generation process",
+			"type": "object",
+			"properties": {
+				"temperature": {
+					"type": "number",
+					"description": "The value used to modulate the next token probabilities."
+				},
+				"topK": {
+					"type": "integer",
+					"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
+				},
+				"topP": {
+					"type": "number",
+					"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
+				},
+				"typicalP": {
+					"type": "number",
+					"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
+				},
+				"epsilonCutoff": {
+					"type": "number",
+					"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
+				},
+				"etaCutoff": {
+					"type": "number",
+					"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
+				},
+				"maxLength": {
+					"type": "integer",
+					"description": "The maximum length (in tokens) of the generated text, including the input."
+				},
+				"maxNewTokens": {
+					"type": "integer",
+					"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
+				},
+				"minLength": {
+					"type": "integer",
+					"description": "The minimum length (in tokens) of the generated text, including the input."
+				},
+				"minNewTokens": {
+					"type": "integer",
+					"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
+				},
+				"doSample": {
+					"type": "boolean",
+					"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
+				},
+				"earlyStopping": {
+					"description": "Controls the stopping condition for beam-based methods.",
+					"oneOf": [{ "type": "boolean" }, { "const": "never", "type": "string" }]
+				},
+				"numBeams": {
+					"type": "integer",
+					"description": "Number of beams to use for beam search."
+				},
+				"numBeamGroups": {
+					"type": "integer",
+					"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
+				},
+				"penaltyAlpha": {
+					"type": "number",
+					"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
+				},
+				"useCache": {
+					"type": "boolean",
+					"description": "Whether the model should use the past last key/values attentions to speed up decoding"
+				}
+			}
+		}
+	}
+}

packages/tasks/src/tasks/depth-estimation/inference.ts ADDED Viewed

	@@ -0,0 +1,35 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+export type DepthEstimationOutput = unknown[];
+/**
+ * Inputs for Depth Estimation inference
+ */
+export interface DepthEstimationInput {
+	/**
+	 * The input image data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: DepthEstimationParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Depth Estimation
+ */
+export interface DepthEstimationParameters {
+	/**
+	 * When specified, limits the output to the top K most probable classes.
+	 */
+	topK?: number;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/depth-estimation/spec/input.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+	"$id": "/inference/schemas/depth-estimation/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Depth Estimation inference",
+	"title": "DepthEstimationInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input image data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/DepthEstimationParameters"
+		}
+	},
+	"$defs": {
+		"DepthEstimationParameters": {
+			"title": "DepthEstimationParameters",
+			"description": "Additional inference parameters for Depth Estimation",
+			"type": "object",
+			"properties": {
+				"topK": {
+					"type": "integer",
+					"description": "When specified, limits the output to the top K most probable classes."
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/depth-estimation/spec/output.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+	"$id": "/inference/schemas/depth-estimation/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Depth Estimation task",
+	"title": "DepthEstimationOutput",
+	"type": "array",
+	"items": {
+		"description": "The output depth labels"
+	}
+}

packages/tasks/src/tasks/document-question-answering/inference.ts ADDED Viewed

	@@ -0,0 +1,102 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Document Question Answering inference
+ */
+export interface DocumentQuestionAnsweringInput {
+	/**
+	 * One (document, question) pair to answer
+	 */
+	data: DocumentQuestionAnsweringInputData;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: DocumentQuestionAnsweringParameters;
+	[property: string]: unknown;
+}
+/**
+ * One (document, question) pair to answer
+ */
+export interface DocumentQuestionAnsweringInputData {
+	/**
+	 * The image on which the question is asked
+	 */
+	image: unknown;
+	/**
+	 * A question to ask of the document
+	 */
+	question: string;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Document Question Answering
+ */
+export interface DocumentQuestionAnsweringParameters {
+	/**
+	 * If the words in the document are too long to fit with the question for the model, it will
+	 * be split in several chunks with some overlap. This argument controls the size of that
+	 * overlap.
+	 */
+	docStride?: number;
+	/**
+	 * Whether to accept impossible as an answer
+	 */
+	handleImpossibleAnswer?: boolean;
+	/**
+	 * Language to use while running OCR. Defaults to english.
+	 */
+	lang?: string;
+	/**
+	 * The maximum length of predicted answers (e.g., only answers with a shorter length are
+	 * considered).
+	 */
+	maxAnswerLen?: number;
+	/**
+	 * The maximum length of the question after tokenization. It will be truncated if needed.
+	 */
+	maxQuestionLen?: number;
+	/**
+	 * The maximum length of the total sentence (context + question) in tokens of each chunk
+	 * passed to the model. The context will be split in several chunks (using doc_stride as
+	 * overlap) if needed.
+	 */
+	maxSeqLen?: number;
+	/**
+	 * The number of answers to return (will be chosen by order of likelihood). Can return less
+	 * than top_k answers if there are not enough options available within the context.
+	 */
+	topK?: number;
+	/**
+	 * A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
+	 * skip the OCR step and use the provided bounding boxes instead.
+	 */
+	wordBoxes?: WordBox[];
+	[property: string]: unknown;
+}
+export type WordBox = number[] | string;
+export type DocumentQuestionAnsweringOutput = DocumentQuestionAnsweringOutputElement[];
+/**
+ * Outputs of inference for the Document Question Answering task
+ */
+export interface DocumentQuestionAnsweringOutputElement {
+	/**
+	 * The answer to the question.
+	 */
+	answer: string;
+	end: number;
+	/**
+	 * The probability associated to the answer.
+	 */
+	score: number;
+	start: number;
+	/**
+	 * The index of each word/box pair that is in the answer
+	 */
+	words: number[];
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/document-question-answering/spec/input.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+	"$id": "/inference/schemas/document-question-answering/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Document Question Answering inference",
+	"title": "DocumentQuestionAnsweringInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "One (document, question) pair to answer",
+			"type": "object",
+			"title": "DocumentQuestionAnsweringInputData",
+			"properties": {
+				"image": {
+					"description": "The image on which the question is asked"
+				},
+				"question": {
+					"type": "string",
+					"description": "A question to ask of the document"
+				}
+			},
+			"required": ["image", "question"]
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/DocumentQuestionAnsweringParameters"
+		}
+	},
+	"$defs": {
+		"DocumentQuestionAnsweringParameters": {
+			"title": "DocumentQuestionAnsweringParameters",
+			"description": "Additional inference parameters for Document Question Answering",
+			"type": "object",
+			"properties": {
+				"docStride": {
+					"type": "integer",
+					"description": "If the words in the document are too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
+				},
+				"handleImpossibleAnswer": {
+					"type": "boolean",
+					"description": "Whether to accept impossible as an answer"
+				},
+				"lang": {
+					"type": "string",
+					"description": "Language to use while running OCR. Defaults to english."
+				},
+				"maxAnswerLen": {
+					"type": "integer",
+					"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
+				},
+				"maxSeqLen": {
+					"type": "integer",
+					"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using doc_stride as overlap) if needed."
+				},
+				"maxQuestionLen": {
+					"type": "integer",
+					"description": "The maximum length of the question after tokenization. It will be truncated if needed."
+				},
+				"topK": {
+					"type": "integer",
+					"description": "The number of answers to return (will be chosen by order of likelihood). Can return less than top_k answers if there are not enough options available within the context."
+				},
+				"wordBoxes": {
+					"type": "array",
+					"description": "A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR step and use the provided bounding boxes instead.",
+					"items": {
+						"anyOf": [
+							{
+								"type": "string"
+							},
+							{
+								"type": "array",
+								"items": {
+									"type": "number"
+								},
+								"maxLength": 4,
+								"minLength": 4
+							}
+						]
+					}
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/document-question-answering/spec/output.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+	"$id": "/inference/schemas/document-question-answering/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Document Question Answering task",
+	"title": "DocumentQuestionAnsweringOutput",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"answer": {
+				"type": "string",
+				"description": "The answer to the question."
+			},
+			"score": {
+				"type": "number",
+				"description": "The probability associated to the answer."
+			},
+			"start": {
+				"type": "integer",
+				"descrtiption": "The start word index of the answer (in the OCR’d version of the input or provided word boxes)."
+			},
+			"end": {
+				"type": "integer",
+				"descrtiption": "The end word index of the answer (in the OCR’d version of the input or provided word boxes)."
+			},
+			"words": {
+				"type": "array",
+				"items": {
+					"type": "integer"
+				},
+				"description": "The index of each word/box pair that is in the answer"
+			}
+		},
+		"required": ["answer", "score", "start", "end", "words"]
+	}
+}

packages/tasks/src/tasks/feature-extraction/inference.ts ADDED Viewed

	@@ -0,0 +1,22 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+export type FeatureExtractionOutput = unknown[];
+/**
+ * Inputs for Text Embedding inference
+ */
+export interface FeatureExtractionInput {
+	/**
+	 * The text to get the embeddings of
+	 */
+	data: string;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: { [key: string]: unknown };
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/feature-extraction/spec/input.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+	"$id": "/inference/schemas/feature-extraction/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Text Embedding inference",
+	"title": "FeatureExtractionInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The text to get the embeddings of",
+			"type": "string"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/FeatureExtractionParameters"
+		}
+	},
+	"$defs": {
+		"FeatureExtractionParameters": {
+			"title": "FeatureExtractionParameters",
+			"description": "Additional inference parameters for Feature Extraction",
+			"type": "object",
+			"properties": {}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/feature-extraction/spec/output.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+	"$id": "/inference/schemas/feature-extraction/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "The embedding for the input text, as a nested list (tensor) of floats",
+	"type": "array",
+	"title": "FeatureExtractionOutput"
+}

packages/tasks/src/tasks/fill-mask/inference.ts ADDED Viewed

	@@ -0,0 +1,61 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Fill Mask inference
+ */
+export interface FillMaskInput {
+	/**
+	 * The text with masked tokens
+	 */
+	data: string;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: FillMaskParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Fill Mask
+ */
+export interface FillMaskParameters {
+	/**
+	 * When passed, the model will limit the scores to the passed targets instead of looking up
+	 * in the whole vocabulary. If the provided targets are not in the model vocab, they will be
+	 * tokenized and the first resulting token will be used (with a warning, and that might be
+	 * slower).
+	 */
+	targets?: string[];
+	/**
+	 * When passed, overrides the number of predictions to return.
+	 */
+	topK?: number;
+	[property: string]: unknown;
+}
+export type FillMaskOutput = FillMaskOutputElement[];
+/**
+ * Outputs of inference for the Fill Mask task
+ */
+export interface FillMaskOutputElement {
+	/**
+	 * The corresponding probability
+	 */
+	score: number;
+	/**
+	 * The corresponding input with the mask token prediction.
+	 */
+	sequence: string;
+	/**
+	 * The predicted token id (to replace the masked one).
+	 */
+	token: number;
+	/**
+	 * The predicted token (to replace the masked one).
+	 */
+	tokenStr: string;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/fill-mask/spec/input.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+	"$id": "/inference/schemas/fill-mask/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Fill Mask inference",
+	"title": "FillMaskInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The text with masked tokens",
+			"type": "string"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/FillMaskParameters"
+		}
+	},
+	"$defs": {
+		"FillMaskParameters": {
+			"title": "FillMaskParameters",
+			"description": "Additional inference parameters for Fill Mask",
+			"type": "object",
+			"properties": {
+				"topK": {
+					"type": "integer",
+					"description": "When passed, overrides the number of predictions to return."
+				},
+				"targets": {
+					"description": "When passed, the model will limit the scores to the passed targets instead of looking up in the whole vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first resulting token will be used (with a warning, and that might be slower).",
+					"type": "array",
+					"items": {
+						"type": "string"
+					}
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/fill-mask/spec/output.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+	"$id": "/inference/schemas/fill-mask/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Fill Mask task",
+	"title": "FillMaskOutput",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"sequence": {
+				"type": "string",
+				"description": "The corresponding input with the mask token prediction."
+			},
+			"score": {
+				"type": "number",
+				"description": "The corresponding probability"
+			},
+			"token": {
+				"type": "integer",
+				"description": "The predicted token id (to replace the masked one)."
+			},
+			"tokenStr": {
+				"type": "string",
+				"description": "The predicted token (to replace the masked one)."
+			}
+		},
+		"required": ["sequence", "score", "token", "tokenStr"]
+	}
+}

packages/tasks/src/tasks/image-classification/inference.ts ADDED Viewed

	@@ -0,0 +1,51 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Image Classification inference
+ */
+export interface ImageClassificationInput {
+	/**
+	 * The input image data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: ImageClassificationParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Image Classification
+ */
+export interface ImageClassificationParameters {
+	functionToApply?: ClassificationOutputTransform;
+	/**
+	 * When specified, limits the output to the top K most probable classes.
+	 */
+	topK?: number;
+	[property: string]: unknown;
+}
+/**
+ * The function to apply to the model outputs in order to retrieve the scores.
+ */
+export type ClassificationOutputTransform = "sigmoid" | "softmax" | "none";
+export type ImageClassificationOutput = ImageClassificationOutputElement[];
+/**
+ * Outputs of inference for the Image Classification task
+ */
+export interface ImageClassificationOutputElement {
+	/**
+	 * The predicted class label.
+	 */
+	label: string;
+	/**
+	 * The corresponding probability.
+	 */
+	score: number;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/image-classification/spec/input.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+	"$id": "/inference/schemas/image-classification/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Image Classification inference",
+	"title": "ImageClassificationInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input image data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/ImageClassificationParameters"
+		}
+	},
+	"$defs": {
+		"ImageClassificationParameters": {
+			"title": "ImageClassificationParameters",
+			"description": "Additional inference parameters for Image Classification",
+			"type": "object",
+			"properties": {
+				"functionToApply": {
+					"title": "ImageClassificationOutputTransform",
+					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
+				},
+				"topK": {
+					"type": "integer",
+					"description": "When specified, limits the output to the top K most probable classes."
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/image-classification/spec/output.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+	"$id": "/inference/schemas/image-classification/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Image Classification task",
+	"title": "ImageClassificationOutput",
+	"type": "array",
+	"items": {
+		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
+	}
+}

packages/tasks/src/tasks/image-segmentation/inference.ts ADDED Viewed

	@@ -0,0 +1,65 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Image Segmentation inference
+ */
+export interface ImageSegmentationInput {
+	/**
+	 * The input image data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: ImageSegmentationParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Image Segmentation
+ */
+export interface ImageSegmentationParameters {
+	/**
+	 * Threshold to use when turning the predicted masks into binary values.
+	 */
+	maskThreshold?: number;
+	/**
+	 * Mask overlap threshold to eliminate small, disconnected segments.
+	 */
+	overlapMaskAreaThreshold?: number;
+	/**
+	 * Segmentation task to be performed, depending on model capabilities.
+	 */
+	subtask?: ImageSegmentationSubtask;
+	/**
+	 * Probability threshold to filter out predicted masks.
+	 */
+	threshold?: number;
+	[property: string]: unknown;
+}
+export type ImageSegmentationSubtask = "instance" | "panoptic" | "semantic";
+export type ImageSegmentationOutput = ImageSegmentationOutputElement[];
+/**
+ * Outputs of inference for the Image Segmentation task
+ *
+ * A predicted mask / segment
+ */
+export interface ImageSegmentationOutputElement {
+	/**
+	 * The label of the predicted segment
+	 */
+	label: string;
+	/**
+	 * The corresponding mask as a black-and-white image
+	 */
+	mask: unknown;
+	/**
+	 * The score or confidence degreee the model has
+	 */
+	score?: number;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/image-segmentation/spec/input.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+	"$id": "/inference/schemas/image-segmentation/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Image Segmentation inference",
+	"title": "ImageSegmentationInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input image data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/ImageSegmentationParameters"
+		}
+	},
+	"$defs": {
+		"ImageSegmentationParameters": {
+			"title": "ImageSegmentationParameters",
+			"description": "Additional inference parameters for Image Segmentation",
+			"type": "object",
+			"properties": {
+				"maskThreshold": {
+					"type": "number",
+					"description": "Threshold to use when turning the predicted masks into binary values."
+				},
+				"overlapMaskAreaThreshold": {
+					"type": "number",
+					"description": "Mask overlap threshold to eliminate small, disconnected segments."
+				},
+				"subtask": {
+					"title": "ImageSegmentationSubtask",
+					"type": "string",
+					"description": "Segmentation task to be performed, depending on model capabilities.",
+					"oneOf": [
+						{
+							"const": "instance"
+						},
+						{
+							"const": "panoptic"
+						},
+						{
+							"const": "semantic"
+						}
+					]
+				},
+				"threshold": {
+					"type": "number",
+					"description": "Probability threshold to filter out predicted masks."
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/image-segmentation/spec/output.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+	"$id": "/inference/schemas/image-segmentation/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Image Segmentation task",
+	"title": "ImageSegmentationOutput",
+	"type": "array",
+	"items": {
+		"description": "A predicted mask / segment",
+		"type": "object",
+		"properties": {
+			"label": {
+				"type": "string",
+				"description": "The label of the predicted segment"
+			},
+			"mask": {
+				"description": "The corresponding mask as a black-and-white image"
+			},
+			"score": {
+				"type": "number",
+				"description": "The score or confidence degreee the model has"
+			}
+		},
+		"required": ["label", "mask"]
+	}
+}

packages/tasks/src/tasks/image-to-image/inference.ts ADDED Viewed

	@@ -0,0 +1,67 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Image To Image inference
+ */
+export interface ImageToImageInput {
+	/**
+	 * The input image data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: ImageToImageParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Image To Image
+ */
+export interface ImageToImageParameters {
+	/**
+	 * For diffusion models. A higher guidance scale value encourages the model to generate
+	 * images closely linked to the text prompt at the expense of lower image quality.
+	 */
+	guidanceScale?: number;
+	/**
+	 * One or several prompt to guide what NOT to include in image generation.
+	 */
+	negativePrompt?: string[];
+	/**
+	 * For diffusion models. The number of denoising steps. More denoising steps usually lead to
+	 * a higher quality image at the expense of slower inference.
+	 */
+	numInferenceSteps?: number;
+	/**
+	 * The size in pixel of the output image
+	 */
+	targetSize?: TargetSize;
+	[property: string]: unknown;
+}
+/**
+ * The size in pixel of the output image
+ */
+export interface TargetSize {
+	height: number;
+	width: number;
+	[property: string]: unknown;
+}
+/**
+ * Outputs of inference for the Image To Image task
+ */
+export interface ImageToImageOutput {
+	/**
+	 * The output image
+	 */
+	image?: unknown;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/image-to-image/spec/input.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+	"$id": "/inference/schemas/image-to-image/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Image To Image inference",
+	"title": "ImageToImageInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input image data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/ImageToImageParameters"
+		}
+	},
+	"$defs": {
+		"ImageToImageParameters": {
+			"title": "ImageToImageParameters",
+			"description": "Additional inference parameters for Image To Image",
+			"type": "object",
+			"properties": {
+				"guidanceScale": {
+					"type": "number",
+					"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
+				},
+				"negativePrompt": {
+					"type": "array",
+					"items": { "type": "string" },
+					"description": "One or several prompt to guide what NOT to include in image generation."
+				},
+				"numInferenceSteps": {
+					"type": "integer",
+					"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
+				},
+				"targetSize": {
+					"type": "object",
+					"description": "The size in pixel of the output image",
+					"properties": {
+						"width": {
+							"type": "integer"
+						},
+						"height": {
+							"type": "integer"
+						}
+					},
+					"required": ["width", "height"]
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/image-to-image/spec/output.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"$id": "/inference/schemas/image-to-image/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Image To Image task",
+	"title": "ImageToImageOutput",
+	"type": "object",
+	"properties": {
+		"image": {
+			"description": "The output image"
+		}
+	}
+}

packages/tasks/src/tasks/image-to-text/inference.ts ADDED Viewed

	@@ -0,0 +1,138 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Image To Text inference
+ */
+export interface ImageToTextInput {
+	/**
+	 * The input image data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: ImageToTextParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Image To Text
+ */
+export interface ImageToTextParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	/**
+	 * The amount of maximum tokens to generate.
+	 */
+	maxNewTokens?: number;
+	[property: string]: unknown;
+}
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * Whether to use sampling instead of greedy decoding when generating new tokens.
+	 */
+	doSample?: boolean;
+	/**
+	 * Controls the stopping condition for beam-based methods.
+	 */
+	earlyStopping?: EarlyStoppingUnion;
+	/**
+	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
+	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+	 */
+	epsilonCutoff?: number;
+	/**
+	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+	 * float strictly between 0 and 1, a token is only considered if it is greater than either
+	 * eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+	 * term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+	 * the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+	 * for more details.
+	 */
+	etaCutoff?: number;
+	/**
+	 * The maximum length (in tokens) of the generated text, including the input.
+	 */
+	maxLength?: number;
+	/**
+	 * The maximum number of tokens to generate. Takes precedence over maxLength.
+	 */
+	maxNewTokens?: number;
+	/**
+	 * The minimum length (in tokens) of the generated text, including the input.
+	 */
+	minLength?: number;
+	/**
+	 * The minimum number of tokens to generate. Takes precedence over maxLength.
+	 */
+	minNewTokens?: number;
+	/**
+	 * Number of groups to divide num_beams into in order to ensure diversity among different
+	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+	 */
+	numBeamGroups?: number;
+	/**
+	 * Number of beams to use for beam search.
+	 */
+	numBeams?: number;
+	/**
+	 * The value balances the model confidence and the degeneration penalty in contrastive
+	 * search decoding.
+	 */
+	penaltyAlpha?: number;
+	/**
+	 * The value used to modulate the next token probabilities.
+	 */
+	temperature?: number;
+	/**
+	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
+	 */
+	topK?: number;
+	/**
+	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
+	 * that add up to top_p or higher are kept for generation.
+	 */
+	topP?: number;
+	/**
+	 * Local typicality measures how similar the conditional probability of predicting a target
+	 * token next is to the expected conditional probability of predicting a random token next,
+	 * given the partial text already generated. If set to float < 1, the smallest set of the
+	 * most locally typical tokens with probabilities that add up to typical_p or higher are
+	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+	 */
+	typicalP?: number;
+	/**
+	 * Whether the model should use the past last key/values attentions to speed up decoding
+	 */
+	useCache?: boolean;
+	[property: string]: unknown;
+}
+/**
+ * Controls the stopping condition for beam-based methods.
+ */
+export type EarlyStoppingUnion = boolean | "never";
+export type ImageToTextOutput = ImageToTextOutputElement[];
+/**
+ * Outputs of inference for the Image To Text task
+ */
+export interface ImageToTextOutputElement {
+	/**
+	 * The generated text.
+	 */
+	generatedText: string;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/image-to-text/spec/input.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+	"$id": "/inference/schemas/image-to-text/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Image To Text inference",
+	"title": "ImageToTextInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input image data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/ImageToTextParameters"
+		}
+	},
+	"$defs": {
+		"ImageToTextParameters": {
+			"title": "ImageToTextParameters",
+			"description": "Additional inference parameters for Image To Text",
+			"type": "object",
+			"properties": {
+				"maxNewTokens": {
+					"type": "integer",
+					"description": "The amount of maximum tokens to generate."
+				},
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/common-definitions.json#/definitions/GenerationParameters"
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/image-to-text/spec/output.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+	"$id": "/inference/schemas/image-to-text/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Image To Text task",
+	"title": "ImageToTextOutput",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"generatedText": {
+				"type": "string",
+				"description": "The generated text."
+			}
+		},
+		"required": ["generatedText"]
+	}
+}

packages/tasks/src/tasks/index.ts CHANGED Viewed

@@ -216,6 +216,7 @@ export interface TaskData {
 	datasets: ExampleRepo[];
 	demo: TaskDemo;
 	id: PipelineType;
 	isPlaceholder?: boolean;
 	label: string;
 	libraries: ModelLibraryKey[];

 	datasets: ExampleRepo[];
 	demo: TaskDemo;
 	id: PipelineType;
+	canonicalId?: PipelineType;
 	isPlaceholder?: boolean;
 	label: string;
 	libraries: ModelLibraryKey[];

packages/tasks/src/tasks/object-detection/inference.ts ADDED Viewed

	@@ -0,0 +1,62 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Object Detection inference
+ */
+export interface ObjectDetectionInput {
+	/**
+	 * The input image data
+	 */
+	data: unknown;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: ObjectDetectionParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Object Detection
+ */
+export interface ObjectDetectionParameters {
+	/**
+	 * The probability necessary to make a prediction.
+	 */
+	threshold?: number;
+	[property: string]: unknown;
+}
+/**
+ * The predicted bounding box. Coordinates are relative to the top left corner of the input
+ * image.
+ */
+export interface BoundingBox {
+	xmax: number;
+	xmin: number;
+	ymax: number;
+	ymin: number;
+	[property: string]: unknown;
+}
+export type ObjectDetectionOutput = ObjectDetectionOutputElement[];
+/**
+ * Outputs of inference for the Object Detection task
+ */
+export interface ObjectDetectionOutputElement {
+	/**
+	 * The predicted bounding box. Coordinates are relative to the top left corner of the input
+	 * image.
+	 */
+	box: BoundingBox;
+	/**
+	 * The predicted label for the bounding box
+	 */
+	label: string;
+	/**
+	 * The associated score / probability
+	 */
+	score: number;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/object-detection/spec/input.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+	"$id": "/inference/schemas/object-detection/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Object Detection inference",
+	"title": "ObjectDetectionInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "The input image data"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/ObjectDetectionParameters"
+		}
+	},
+	"$defs": {
+		"ObjectDetectionParameters": {
+			"title": "ObjectDetectionParameters",
+			"description": "Additional inference parameters for Object Detection",
+			"type": "object",
+			"properties": {
+				"threshold": {
+					"type": "number",
+					"description": "The probability necessary to make a prediction."
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/object-detection/spec/output.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+	"$id": "/inference/schemas/object-detection/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs of inference for the Object Detection task",
+	"title": "ObjectDetectionOutput",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"label": {
+				"type": "string",
+				"description": "The predicted label for the bounding box"
+			},
+			"score": {
+				"type": "number",
+				"description": "The associated score / probability"
+			},
+			"box": {
+				"$ref": "#/$defs/BoundingBox",
+				"description": "The predicted bounding box. Coordinates are relative to the top left corner of the input image."
+			}
+		},
+		"required": ["box", "label", "score"]
+	},
+	"$defs": {
+		"BoundingBox": {
+			"type": "object",
+			"title": "BoundingBox",
+			"properties": {
+				"xmin": {
+					"type": "integer"
+				},
+				"xmax": {
+					"type": "integer"
+				},
+				"ymin": {
+					"type": "integer"
+				},
+				"ymax": {
+					"type": "integer"
+				}
+			},
+			"required": ["xmin", "xmax", "ymin", "ymax"]
+		}
+	}
+}

packages/tasks/src/tasks/placeholder/data.ts CHANGED Viewed

@@ -13,6 +13,9 @@ const taskData: TaskDataCustom = {
 	summary: "",
 	widgetModels: [],
 	youtubeId: undefined,
 };
 export default taskData;

 	summary: "",
 	widgetModels: [],
 	youtubeId: undefined,
+	/// If this is a subtask, link to the most general task ID
+	/// (eg, text2text-generation is the canonical ID of translation)
+	canonicalId: undefined,
 };
 export default taskData;

packages/tasks/src/tasks/placeholder/spec/input.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+	"$id": "/inference/schemas/<TASK_ID>/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for <TASK_ID> inference",
+	"title": "PlaceholderInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"description": "TODO: describe the input here. This must be model & framework agnostic.",
+			"type": "string"
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/<TASK_ID>Parameters"
+		}
+	},
+	"$defs": {
+		"<TASK_ID>Parameters": {
+			"title": "<TASK_ID>Parameters",
+			"description": "TODO: describe additional parameters here.",
+			"type": "object",
+			"properties": {
+				"dummyParameterName": {
+					"type": "boolean",
+					"description": "TODO: describe the parameter here"
+				},
+				"dummyParameterName2": {
+					"type": "integer",
+					"description": "TODO: describe the parameter here"
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/placeholder/spec/output.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+	"$id": "/inference/schemas/<TASK_ID>/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs for <TASK_ID> inference",
+	"title": "PlaceholderOutput",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"meaningfulOutputName": {
+				"type": "string",
+				"description": "TODO: Describe what is outputed by the inference here"
+			}
+		},
+		"required": ["meaningfulOutputName"]
+	}
+}

packages/tasks/src/tasks/question-answering/inference.ts ADDED Viewed

	@@ -0,0 +1,99 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Question Answering inference
+ */
+export interface QuestionAnsweringInput {
+	/**
+	 * One (context, question) pair to answer
+	 */
+	data: QuestionAnsweringInputData;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: QuestionAnsweringParameters;
+	[property: string]: unknown;
+}
+/**
+ * One (context, question) pair to answer
+ */
+export interface QuestionAnsweringInputData {
+	/**
+	 * The context to be used for answering the question
+	 */
+	context: string;
+	/**
+	 * The question to be answered
+	 */
+	question: string;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Question Answering
+ */
+export interface QuestionAnsweringParameters {
+	/**
+	 * Attempts to align the answer to real words. Improves quality on space separated
+	 * languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
+	 */
+	alignToWords?: boolean;
+	/**
+	 * If the context is too long to fit with the question for the model, it will be split in
+	 * several chunks with some overlap. This argument controls the size of that overlap.
+	 */
+	docStride?: number;
+	/**
+	 * Whether to accept impossible as an answer.
+	 */
+	handleImpossibleAnswer?: boolean;
+	/**
+	 * The maximum length of predicted answers (e.g., only answers with a shorter length are
+	 * considered).
+	 */
+	maxAnswerLen?: number;
+	/**
+	 * The maximum length of the question after tokenization. It will be truncated if needed.
+	 */
+	maxQuestionLen?: number;
+	/**
+	 * The maximum length of the total sentence (context + question) in tokens of each chunk
+	 * passed to the model. The context will be split in several chunks (using docStride as
+	 * overlap) if needed.
+	 */
+	maxSeqLen?: number;
+	/**
+	 * The number of answers to return (will be chosen by order of likelihood). Note that we
+	 * return less than topk answers if there are not enough options available within the
+	 * context.
+	 */
+	topK?: number;
+	[property: string]: unknown;
+}
+export type QuestionAnsweringOutput = QuestionAnsweringOutputElement[];
+/**
+ * Outputs of inference for the Question Answering task
+ */
+export interface QuestionAnsweringOutputElement {
+	/**
+	 * The answer to the question.
+	 */
+	answer: string;
+	/**
+	 * The character position in the input where the answer ends.
+	 */
+	end: number;
+	/**
+	 * The probability associated to the answer.
+	 */
+	score: number;
+	/**
+	 * The character position in the input where the answer begins.
+	 */
+	start: number;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/question-answering/spec/input.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+	"$id": "/inference/schemas/question-answering/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Question Answering inference",
+	"title": "QuestionAnsweringInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"title": "QuestionAnsweringInputData",
+			"description": "One (context, question) pair to answer",
+			"type": "object",
+			"properties": {
+				"context": {
+					"type": "string",
+					"description": "The context to be used for answering the question"
+				},
+				"question": {
+					"type": "string",
+					"description": "The question to be answered"
+				}
+			},
+			"required": ["question", "context"]
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/QuestionAnsweringParameters"
+		}
+	},
+	"$defs": {
+		"QuestionAnsweringParameters": {
+			"title": "QuestionAnsweringParameters",
+			"description": "Additional inference parameters for Question Answering",
+			"type": "object",
+			"properties": {
+				"topK": {
+					"type": "integer",
+					"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
+				},
+				"docStride": {
+					"type": "integer",
+					"description": "If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
+				},
+				"maxAnswerLen": {
+					"type": "integer",
+					"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
+				},
+				"maxSeqLen": {
+					"type": "integer",
+					"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed."
+				},
+				"maxQuestionLen": {
+					"type": "integer",
+					"description": "The maximum length of the question after tokenization. It will be truncated if needed."
+				},
+				"handleImpossibleAnswer": {
+					"type": "boolean",
+					"description": "Whether to accept impossible as an answer."
+				},
+				"alignToWords": {
+					"type": "boolean",
+					"description": "Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese)"
+				}
+			}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/question-answering/spec/output.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+	"$id": "/inference/schemas/question-answering/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"title": "QuestionAnsweringOutput",
+	"description": "Outputs of inference for the Question Answering task",
+	"type": "array",
+	"items": {
+		"type": "object",
+		"properties": {
+			"answer": {
+				"type": "string",
+				"description": "The answer to the question."
+			},
+			"score": {
+				"type": "number",
+				"description": "The probability associated to the answer."
+			},
+			"start": {
+				"type": "integer",
+				"description": "The character position in the input where the answer begins."
+			},
+			"end": {
+				"type": "integer",
+				"description": "The character position in the input where the answer ends."
+			}
+		},
+		"required": ["answer", "score", "start", "end"]
+	}
+}

packages/tasks/src/tasks/sentence-similarity/inference.ts ADDED Viewed

	@@ -0,0 +1,32 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+export type SentenceSimilarityOutput = number[];
+/**
+ * Inputs for Sentence similarity inference
+ */
+export interface SentenceSimilarityInput {
+	data: SentenceSimilarityInputData;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: { [key: string]: unknown };
+	[property: string]: unknown;
+}
+export interface SentenceSimilarityInputData {
+	/**
+	 * A list of strings which will be compared against the source_sentence.
+	 */
+	sentences: string[];
+	/**
+	 * The string that you wish to compare the other strings with. This can be a phrase,
+	 * sentence, or longer passage, depending on the model being used.
+	 */
+	sourceSentence: string;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/sentence-similarity/spec/input.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+	"$id": "/inference/schemas/sentence-similarity/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for Sentence similarity inference",
+	"title": "SentenceSimilarityInput",
+	"type": "object",
+	"properties": {
+		"data": {
+			"title": "SentenceSimilarityInputData",
+			"type": "object",
+			"properties": {
+				"sourceSentence": {
+					"description": "The string that you wish to compare the other strings with. This can be a phrase, sentence, or longer passage, depending on the model being used.",
+					"type": "string"
+				},
+				"sentences": {
+					"type": "array",
+					"description": "A list of strings which will be compared against the source_sentence.",
+					"items": {
+						"type": "string"
+					}
+				}
+			},
+			"required": ["sourceSentence", "sentences"]
+		},
+		"parameters": {
+			"description": "Additional inference parameters",
+			"$ref": "#/$defs/SentenceSimilarityParameters"
+		}
+	},
+	"$defs": {
+		"SentenceSimilarityParameters": {
+			"title": "SentenceSimilarityParameters",
+			"description": "Additional inference parameters for Sentence Similarity",
+			"type": "object",
+			"properties": {}
+		}
+	},
+	"required": ["data"]
+}

packages/tasks/src/tasks/sentence-similarity/spec/output.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"$id": "/inference/schemas/sentence-similarity/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"title": "SentenceSimilarityOutput",
+	"description": "Outputs of inference for the Sentence Similarity task",
+	"type": "array",
+	"items": {
+		"description": "The associated similarity score for each of the given sentences",
+		"type": "number",
+		"title": "SentenceSimilarityScore"
+	}
+}

packages/tasks/src/tasks/summarization/data.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type { TaskDataCustom } from "..";
 const taskData: TaskDataCustom = {
 	datasets: [
 		{
 			description:

 import type { TaskDataCustom } from "..";
 const taskData: TaskDataCustom = {
+	canonicalId: "text2text-generation",
 	datasets: [
 		{
 			description:

packages/tasks/src/tasks/summarization/inference.ts ADDED Viewed

	@@ -0,0 +1,58 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for Summarization inference
+ *
+ * Inputs for Text2text Generation inference
+ */
+export interface SummarizationInput {
+	/**
+	 * The input text data
+	 */
+	data: string;
+	/**
+	 * Additional inference parameters
+	 */
+	parameters?: Text2TextGenerationParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Text2text Generation
+ */
+export interface Text2TextGenerationParameters {
+	/**
+	 * Whether to clean up the potential extra spaces in the text output.
+	 */
+	cleanUpTokenizationSpaces?: boolean;
+	/**
+	 * Additional parametrization of the text generation algorithm
+	 */
+	generateParameters?: { [key: string]: unknown };
+	/**
+	 * The truncation strategy to use
+	 */
+	truncation?: Text2TextGenerationTruncationStrategy;
+	[property: string]: unknown;
+}
+export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
+/**
+ * Outputs for Summarization inference
+ *
+ * Outputs of inference for the Text2text Generation task
+ */
+export interface SummarizationOutput {
+	/**
+	 * The generated text.
+	 */
+	generatedText: string;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/summarization/spec/input.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+	"$ref": "/inference/schemas/text2text-generation/input.json",
+	"$id": "/inference/schemas/summarization/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"title": "SummarizationInput",
+	"description": "Inputs for Summarization inference"
+}