Spaces:

huggingfacejs
/

inference-widgets

Running on CPU Upgrade

App Files Files Community

machineuser commited on Mar 20, 2024

Commit

0f711ba

1 Parent(s): a1bb091

Sync widgets demo

Browse files

Files changed (10) hide show

packages/tasks/scripts/inference-codegen.ts +6 -0
packages/tasks/src/tasks/chat-completion/inference.ts +158 -0
packages/tasks/src/tasks/chat-completion/spec/input.json +63 -0
packages/tasks/src/tasks/chat-completion/spec/output.json +58 -0
packages/tasks/src/tasks/chat-completion/spec/output_stream.json +48 -0
packages/tasks/src/tasks/index.ts +15 -4
packages/tasks/src/tasks/text-generation/inference.ts +75 -19
packages/tasks/src/tasks/text-generation/spec/input.json +4 -0
packages/tasks/src/tasks/text-generation/spec/output.json +101 -56
packages/tasks/src/tasks/text-generation/spec/output_stream.json +47 -0

packages/tasks/scripts/inference-codegen.ts CHANGED Viewed

@@ -54,6 +54,12 @@ async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles:
 		name: `${taskId}-output`,
 		schema: await fs.readFile(`${taskSpecDir}/output.json`, { encoding: "utf-8" }),
 	});
 	const inputData = new InputData();
 	inputData.addInput(schema);
 	return inputData;

 		name: `${taskId}-output`,
 		schema: await fs.readFile(`${taskSpecDir}/output.json`, { encoding: "utf-8" }),
 	});
+	if (taskId === "text-generation" || taskId === "chat-completion") {
+		await schema.addSource({
+			name: `${taskId}-stream-output`,
+			schema: await fs.readFile(`${taskSpecDir}/output_stream.json`, { encoding: "utf-8" }),
+		});
+	}
 	const inputData = new InputData();
 	inputData.addInput(schema);
 	return inputData;

packages/tasks/src/tasks/chat-completion/inference.ts ADDED Viewed

	@@ -0,0 +1,158 @@

+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for ChatCompletion inference
+ */
+export interface ChatCompletionInput {
+	/**
+	 * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+	 * frequency in the text so far, decreasing the model's likelihood to repeat the same line
+	 * verbatim.
+	 */
+	frequency_penalty?: number;
+	/**
+	 * The maximum number of tokens that can be generated in the chat completion.
+	 */
+	max_tokens?: number;
+	messages: ChatCompletionInputMessage[];
+	/**
+	 * The random sampling seed.
+	 */
+	seed?: number;
+	/**
+	 * Stop generating tokens if a stop token is generated.
+	 */
+	stop?: ChatCompletionInputStopReason;
+	/**
+	 * If set, partial message deltas will be sent.
+	 */
+	stream?: boolean;
+	/**
+	 * The value used to modulate the logits distribution.
+	 */
+	temperature?: number;
+	/**
+	 * If set to < 1, only the smallest set of most probable tokens with probabilities that add
+	 * up to `top_p` or higher are kept for generation.
+	 */
+	top_p?: number;
+	[property: string]: unknown;
+}
+export interface ChatCompletionInputMessage {
+	/**
+	 * The content of the message.
+	 */
+	content: string;
+	role: ChatCompletionMessageRole;
+	[property: string]: unknown;
+}
+/**
+ * The role of the message author.
+ */
+export type ChatCompletionMessageRole = "assistant" | "system" | "user";
+/**
+ * Stop generating tokens if a stop token is generated.
+ */
+export type ChatCompletionInputStopReason = string[] | string;
+/**
+ * Outputs for Chat Completion inference
+ */
+export interface ChatCompletionOutput {
+	/**
+	 * A list of chat completion choices.
+	 */
+	choices: ChatCompletionOutputChoice[];
+	/**
+	 * The Unix timestamp (in seconds) of when the chat completion was created.
+	 */
+	created: number;
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputChoice {
+	/**
+	 * The reason why the generation was stopped.
+	 */
+	finish_reason: ChatCompletionFinishReason;
+	/**
+	 * The index of the choice in the list of choices.
+	 */
+	index: number;
+	message: ChatCompletionOutputChoiceMessage;
+	[property: string]: unknown;
+}
+/**
+ * The reason why the generation was stopped.
+ *
+ * The generated sequence reached the maximum allowed length
+ *
+ * The model generated an end-of-sentence (EOS) token
+ *
+ * One of the sequence in stop_sequences was generated
+ */
+export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
+export interface ChatCompletionOutputChoiceMessage {
+	/**
+	 * The content of the chat completion message.
+	 */
+	content: string;
+	role: ChatCompletionMessageRole;
+	[property: string]: unknown;
+}
+/**
+ * Chat Completion Stream Output
+ */
+export interface ChatCompletionStreamOutput {
+	/**
+	 * A list of chat completion choices.
+	 */
+	choices: ChatCompletionStreamOutputChoice[];
+	/**
+	 * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
+	 * the same timestamp.
+	 */
+	created: number;
+	[property: string]: unknown;
+}
+export interface ChatCompletionStreamOutputChoice {
+	/**
+	 * A chat completion delta generated by streamed model responses.
+	 */
+	delta: ChatCompletionStreamOutputDelta;
+	/**
+	 * The reason why the generation was stopped.
+	 */
+	finish_reason?: ChatCompletionFinishReason;
+	/**
+	 * The index of the choice in the list of choices.
+	 */
+	index: number;
+	[property: string]: unknown;
+}
+/**
+ * A chat completion delta generated by streamed model responses.
+ */
+export interface ChatCompletionStreamOutputDelta {
+	/**
+	 * The contents of the chunk message.
+	 */
+	content?: string;
+	/**
+	 * The role of the author of this message.
+	 */
+	role?: string;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/chat-completion/spec/input.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+	"title": "ChatCompletionInput",
+	"$id": "/inference/schemas/chat-completion/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for ChatCompletion inference",
+	"type": "object",
+	"properties": {
+		"messages": {
+			"type": "array",
+			"title": "ChatCompletionInputMessage",
+			"items": {
+				"type": "object",
+				"properties": {
+					"role": {
+						"$ref": "#/definitions/Role"
+					},
+					"content": {
+						"type": "string",
+						"description": "The content of the message."
+					}
+				},
+				"required": ["role", "content"]
+			}
+		},
+		"frequency_penalty": {
+			"type": "number",
+			"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
+		},
+		"max_tokens": {
+			"type": "integer",
+			"description": "The maximum number of tokens that can be generated in the chat completion."
+		},
+		"seed": {
+			"type": "integer",
+			"description": "The random sampling seed."
+		},
+		"stop": {
+			"oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }],
+			"title": "ChatCompletionInputStopReason",
+			"description": "Stop generating tokens if a stop token is generated."
+		},
+		"stream": {
+			"type": "boolean",
+			"description": "If set, partial message deltas will be sent."
+		},
+		"temperature": {
+			"type": "number",
+			"description": "The value used to modulate the logits distribution."
+		},
+		"top_p": {
+			"type": "number",
+			"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
+		}
+	},
+	"required": ["messages"],
+	"definitions": {
+		"Role": {
+			"oneOf": [{ "const": "assistant" }, { "const": "system" }, { "const": "user" }],
+			"title": "ChatCompletionMessageRole",
+			"description": "The role of the message author."
+		}
+	}
+}

packages/tasks/src/tasks/chat-completion/spec/output.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+	"$id": "/inference/schemas/chat-completion/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs for Chat Completion inference",
+	"title": "ChatCompletionOutput",
+	"type": "object",
+	"properties": {
+		"choices": {
+			"type": "array",
+			"description": "A list of chat completion choices.",
+			"title": "ChatCompletionOutputChoice",
+			"items": {
+				"type": "object",
+				"properties": {
+					"finish_reason": {
+						"$ref": "#/definitions/FinishReason",
+						"description": "The reason why the generation was stopped."
+					},
+					"index": {
+						"type": "integer",
+						"description": "The index of the choice in the list of choices."
+					},
+					"message": {
+						"type": "object",
+						"properties": {
+							"role": {
+								"$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role"
+							},
+							"content": {
+								"type": "string",
+								"description": "The content of the chat completion message."
+							}
+						},
+						"title": "ChatCompletionOutputChoiceMessage",
+						"required": ["content", "role"]
+					}
+				},
+				"required": ["finish_reason", "index", "message"]
+			}
+		},
+		"created": {
+			"type": "integer",
+			"description": "The Unix timestamp (in seconds) of when the chat completion was created."
+		}
+	},
+	"required": ["choices", "created"],
+	"definitions": {
+		"FinishReason": {
+			"type": "string",
+			"title": "ChatCompletionFinishReason",
+			"oneOf": [
+				{ "const": "length", "description": "The generated sequence reached the maximum allowed length" },
+				{ "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
+				{ "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
+			]
+		}
+	}
+}

packages/tasks/src/tasks/chat-completion/spec/output_stream.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+	"$id": "/inference/schemas/chat-completion/output_stream.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Chat Completion Stream Output",
+	"title": "ChatCompletionStreamOutput",
+	"type": "object",
+	"properties": {
+		"choices": {
+			"type": "array",
+			"title": "ChatCompletionStreamOutputChoice",
+			"description": "A list of chat completion choices.",
+			"items": {
+				"type": "object",
+				"properties": {
+					"delta": {
+						"type": "object",
+						"title": "ChatCompletionStreamOutputDelta",
+						"description": "A chat completion delta generated by streamed model responses.",
+						"properties": {
+							"content": {
+								"type": "string",
+								"description": "The contents of the chunk message."
+							},
+							"role": {
+								"type": "string",
+								"description": "The role of the author of this message."
+							}
+						}
+					},
+					"finish_reason": {
+						"$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason",
+						"description": "The reason why the generation was stopped."
+					},
+					"index": {
+						"type": "integer",
+						"description": "The index of the choice in the list of choices."
+					}
+				},
+				"required": ["delta", "index"]
+			}
+		},
+		"created": {
+			"type": "integer",
+			"description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
+		}
+	},
+	"required": ["choices", "created"]
+}

packages/tasks/src/tasks/index.ts CHANGED Viewed

@@ -38,6 +38,17 @@ import zeroShotObjectDetection from "./zero-shot-object-detection/data";
 export type * from "./audio-classification/inference";
 export type * from "./automatic-speech-recognition/inference";
 export type * from "./document-question-answering/inference";
 export type * from "./feature-extraction/inference";
 export type * from "./fill-mask/inference";
@@ -73,14 +84,14 @@ export type {
 	TextClassificationParameters,
 } from "./text-classification/inference";
 export type {
-	FinishReason,
-	PrefillToken,
 	TextGenerationInput,
 	TextGenerationOutput,
 	TextGenerationOutputDetails,
 	TextGenerationParameters,
-	TextGenerationSequenceDetails,
-	Token,
 } from "./text-generation/inference";
 export type * from "./video-classification/inference";
 export type * from "./visual-question-answering/inference";

 export type * from "./audio-classification/inference";
 export type * from "./automatic-speech-recognition/inference";
+export type {
+	ChatCompletionInput,
+	ChatCompletionInputMessage,
+	ChatCompletionOutput,
+	ChatCompletionOutputChoice,
+	ChatCompletionFinishReason,
+	ChatCompletionOutputChoiceMessage,
+	ChatCompletionStreamOutput,
+	ChatCompletionStreamOutputChoice,
+	ChatCompletionStreamOutputDelta,
+} from "./chat-completion/inference";
 export type * from "./document-question-answering/inference";
 export type * from "./feature-extraction/inference";
 export type * from "./fill-mask/inference";
 	TextClassificationParameters,
 } from "./text-classification/inference";
 export type {
+	TextGenerationFinishReason,
+	TextGenerationPrefillToken,
 	TextGenerationInput,
 	TextGenerationOutput,
 	TextGenerationOutputDetails,
 	TextGenerationParameters,
+	TextGenerationOutputSequenceDetails,
+	TextGenerationOutputToken,
 } from "./text-generation/inference";
 export type * from "./video-classification/inference";
 export type * from "./visual-question-answering/inference";

packages/tasks/src/tasks/text-generation/inference.ts CHANGED Viewed

@@ -16,6 +16,10 @@ export interface TextGenerationInput {
 	 * Additional inference parameters
 	 */
 	parameters?: TextGenerationParameters;
 	[property: string]: unknown;
 }
@@ -114,16 +118,16 @@ export interface TextGenerationOutputDetails {
 	/**
 	 * Details about additional sequences when best_of is provided
 	 */
-	best_of_sequences?: TextGenerationSequenceDetails[];
 	/**
 	 * The reason why the generation was stopped.
 	 */
-	finish_reason: FinishReason;
 	/**
 	 * The number of generated tokens
 	 */
 	generated_tokens: number;
-	prefill: PrefillToken[];
 	/**
 	 * The random seed used for generation
 	 */
@@ -131,24 +135,25 @@ export interface TextGenerationOutputDetails {
 	/**
 	 * The generated tokens and associated details
 	 */
-	tokens: Token[];
 	[property: string]: unknown;
 }
-export interface TextGenerationSequenceDetails {
-	/**
-	 * The reason why the generation was stopped.
-	 */
-	finish_reason: FinishReason;
 	/**
 	 * The generated text
 	 */
-	generated_text: number;
 	/**
 	 * The number of generated tokens
 	 */
 	generated_tokens: number;
-	prefill: PrefillToken[];
 	/**
 	 * The random seed used for generation
 	 */
@@ -156,20 +161,26 @@ export interface TextGenerationSequenceDetails {
 	/**
 	 * The generated tokens and associated details
 	 */
-	tokens: Token[];
 	[property: string]: unknown;
 }
 /**
- * The generated sequence reached the maximum allowed length
  *
- * The model generated an end-of-sentence (EOS) token
  *
- * One of the sequence in stop_sequences was generated
  */
-export type FinishReason = "length" | "eos_token" | "stop_sequence";
-export interface PrefillToken {
 	id: number;
 	logprob: number;
 	/**
@@ -179,9 +190,12 @@ export interface PrefillToken {
 	[property: string]: unknown;
 }
-export interface Token {
 	id: number;
-	logprob: number;
 	/**
 	 * Whether or not that token is a special one
 	 */
@@ -192,3 +206,45 @@ export interface Token {
 	text: string;
 	[property: string]: unknown;
 }

 	 * Additional inference parameters
 	 */
 	parameters?: TextGenerationParameters;
+	/**
+	 * Whether to stream output tokens
+	 */
+	stream?: boolean;
 	[property: string]: unknown;
 }
 	/**
 	 * Details about additional sequences when best_of is provided
 	 */
+	best_of_sequences?: TextGenerationOutputSequenceDetails[];
 	/**
 	 * The reason why the generation was stopped.
 	 */
+	finish_reason: TextGenerationFinishReason;
 	/**
 	 * The number of generated tokens
 	 */
 	generated_tokens: number;
+	prefill: TextGenerationPrefillToken[];
 	/**
 	 * The random seed used for generation
 	 */
 	/**
 	 * The generated tokens and associated details
 	 */
+	tokens: TextGenerationOutputToken[];
+	/**
+	 * Most likely tokens
+	 */
+	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
+export interface TextGenerationOutputSequenceDetails {
+	finish_reason: TextGenerationFinishReason;
 	/**
 	 * The generated text
 	 */
+	generated_text: string;
 	/**
 	 * The number of generated tokens
 	 */
 	generated_tokens: number;
+	prefill: TextGenerationPrefillToken[];
 	/**
 	 * The random seed used for generation
 	 */
 	/**
 	 * The generated tokens and associated details
 	 */
+	tokens: TextGenerationOutputToken[];
+	/**
+	 * Most likely tokens
+	 */
+	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
 /**
+ * The reason why the generation was stopped.
+ *
+ * length: The generated sequence reached the maximum allowed length
  *
+ * eos_token: The model generated an end-of-sentence (EOS) token
  *
+ * stop_sequence: One of the sequence in stop_sequences was generated
  */
+export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
+export interface TextGenerationPrefillToken {
 	id: number;
 	logprob: number;
 	/**
 	[property: string]: unknown;
 }
+/**
+ * Generated token.
+ */
+export interface TextGenerationOutputToken {
 	id: number;
+	logprob?: number;
 	/**
 	 * Whether or not that token is a special one
 	 */
 	text: string;
 	[property: string]: unknown;
 }
+/**
+ * Text Generation Stream Output
+ */
+export interface TextGenerationStreamOutput {
+	/**
+	 * Generation details. Only available when the generation is finished.
+	 */
+	details?: TextGenerationStreamDetails;
+	/**
+	 * The complete generated text. Only available when the generation is finished.
+	 */
+	generated_text?: string;
+	/**
+	 * The token index within the stream. Optional to support older clients that omit it.
+	 */
+	index?: number;
+	/**
+	 * Generated token.
+	 */
+	token: TextGenerationOutputToken;
+	[property: string]: unknown;
+}
+/**
+ * Generation details. Only available when the generation is finished.
+ */
+export interface TextGenerationStreamDetails {
+	/**
+	 * The reason why the generation was stopped.
+	 */
+	finish_reason: TextGenerationFinishReason;
+	/**
+	 * The number of generated tokens
+	 */
+	generated_tokens: number;
+	/**
+	 * The random seed used for generation
+	 */
+	seed: number;
+	[property: string]: unknown;
+}

packages/tasks/src/tasks/text-generation/spec/input.json CHANGED Viewed

@@ -12,6 +12,10 @@
 		"parameters": {
 			"description": "Additional inference parameters",
 			"$ref": "#/$defs/TextGenerationParameters"
 		}
 	},
 	"$defs": {

 		"parameters": {
 			"description": "Additional inference parameters",
 			"$ref": "#/$defs/TextGenerationParameters"
+		},
+		"stream": {
+			"description": "Whether to stream output tokens",
+			"type": "boolean"
 		}
 	},
 	"$defs": {

packages/tasks/src/tasks/text-generation/spec/output.json CHANGED Viewed

@@ -10,43 +10,45 @@
 			"description": "The generated text"
 		},
 		"details": {
-			"description": "When enabled, details about the generation",
-			"title": "TextGenerationOutputDetails",
-			"allOf": [
-				{ "$ref": "#/$defs/SequenceDetails" },
-				{
-					"type": "object",
-					"properties": {
-						"best_of_sequences": {
-							"type": "array",
-							"description": "Details about additional sequences when best_of is provided",
-							"items": {
-								"allOf": [
-									{ "$ref": "#/$defs/SequenceDetails" },
-									{
-										"type": "object",
-										"properties": {
-											"generated_text": {
-												"type": "integer",
-												"description": "The generated text"
-											}
-										},
-										"required": ["generated_text"]
-									}
-								]
-							}
-						}
-					}
-				}
-			]
 		}
 	},
 	"required": ["generated_text"],
 	"$defs": {
 		"Token": {
 			"type": "object",
-			"title": "Token",
 			"properties": {
 				"id": {
 					"type": "integer"
@@ -63,20 +65,15 @@
 					"description": "The text associated with that token"
 				}
 			},
-			"required": ["id", "logprob", "special", "text"]
 		},
-		"SequenceDetails": {
 			"type": "object",
-			"title": "TextGenerationSequenceDetails",
 			"properties": {
 				"finish_reason": {
-					"type": "string",
-					"description": "The reason why the generation was stopped.",
-					"oneOf": [
-						{ "const": "length", "description": "The generated sequence reached the maximum allowed length" },
-						{ "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
-						{ "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
-					]
 				},
 				"generated_tokens": {
 					"type": "integer",
@@ -85,21 +82,7 @@
 				"prefill": {
 					"type": "array",
 					"items": {
-						"title": "PrefillToken",
-						"type": "object",
-						"properties": {
-							"id": {
-								"type": "integer"
-							},
-							"logprob": {
-								"type": "number"
-							},
-							"text": {
-								"type": "string",
-								"description": "The text associated with that token"
-							}
-						},
-						"required": ["id", "logprob", "text"]
 					}
 				},
 				"seed": {
@@ -112,9 +95,71 @@
 					"items": {
 						"$ref": "#/$defs/Token"
 					}
 				}
 			},
 			"required": ["finish_reason", "generated_tokens", "prefill", "tokens"]
 		}
 	}
 }

 			"description": "The generated text"
 		},
 		"details": {
+			"$ref": "#/$defs/Details",
+			"description": "When enabled, details about the generation"
 		}
 	},
 	"required": ["generated_text"],
 	"$defs": {
+		"FinishReason": {
+			"type": "string",
+			"title": "TextGenerationFinishReason",
+			"description": "The reason why the generation was stopped.",
+			"oneOf": [
+				{ "const": "length", "description": "length: The generated sequence reached the maximum allowed length" },
+				{ "const": "eos_token", "description": "eos_token: The model generated an end-of-sentence (EOS) token" },
+				{
+					"const": "stop_sequence",
+					"description": "stop_sequence: One of the sequence in stop_sequences was generated"
+				}
+			]
+		},
+		"PrefillToken": {
+			"title": "TextGenerationPrefillToken",
+			"type": "object",
+			"properties": {
+				"id": {
+					"type": "integer"
+				},
+				"logprob": {
+					"type": "number"
+				},
+				"text": {
+					"type": "string",
+					"description": "The text associated with that token"
+				}
+			},
+			"required": ["id", "logprob", "text"]
+		},
 		"Token": {
 			"type": "object",
+			"title": "TextGenerationOutputToken",
 			"properties": {
 				"id": {
 					"type": "integer"
 					"description": "The text associated with that token"
 				}
 			},
+			"required": ["id", "special", "text"]
 		},
+		"Details": {
 			"type": "object",
+			"title": "TextGenerationOutputDetails",
 			"properties": {
 				"finish_reason": {
+					"$ref": "#/$defs/FinishReason",
+					"description": "The reason why the generation was stopped."
 				},
 				"generated_tokens": {
 					"type": "integer",
 				"prefill": {
 					"type": "array",
 					"items": {
+						"$ref": "#/$defs/PrefillToken"
 					}
 				},
 				"seed": {
 					"items": {
 						"$ref": "#/$defs/Token"
 					}
+				},
+				"top_tokens": {
+					"type": "array",
+					"description": "Most likely tokens",
+					"items": {
+						"type": "array",
+						"items": {
+							"$ref": "#/$defs/Token"
+						}
+					}
+				},
+				"best_of_sequences": {
+					"type": "array",
+					"description": "Details about additional sequences when best_of is provided",
+					"items": {
+						"$ref": "#/$defs/SequenceDetails"
+					}
 				}
 			},
 			"required": ["finish_reason", "generated_tokens", "prefill", "tokens"]
+		},
+		"SequenceDetails": {
+			"type": "object",
+			"title": "TextGenerationOutputSequenceDetails",
+			"properties": {
+				"generated_text": {
+					"type": "string",
+					"description": "The generated text"
+				},
+				"finish_reason": {
+					"$ref": "#/$defs/FinishReason"
+				},
+				"generated_tokens": {
+					"type": "integer",
+					"description": "The number of generated tokens"
+				},
+				"prefill": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/PrefillToken"
+					}
+				},
+				"seed": {
+					"type": "integer",
+					"description": "The random seed used for generation"
+				},
+				"tokens": {
+					"type": "array",
+					"description": "The generated tokens and associated details",
+					"items": {
+						"$ref": "#/$defs/Token"
+					}
+				},
+				"top_tokens": {
+					"type": "array",
+					"description": "Most likely tokens",
+					"items": {
+						"type": "array",
+						"items": {
+							"$ref": "#/$defs/Token"
+						}
+					}
+				}
+			},
+			"required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"]
 		}
 	}
 }

packages/tasks/src/tasks/text-generation/spec/output_stream.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+	"$id": "/inference/schemas/text-generation/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Text Generation Stream Output",
+	"title": "TextGenerationStreamOutput",
+	"type": "object",
+	"properties": {
+		"token": {
+			"$ref": "#/$defs/Token",
+			"description": "Generated token."
+		},
+		"index": {
+			"type": "integer",
+			"description": "The token index within the stream. Optional to support older clients that omit it."
+		},
+		"generated_text": {
+			"type": "string",
+			"description": "The complete generated text. Only available when the generation is finished."
+		},
+		"details": {
+			"$ref": "#/$defs/StreamDetails",
+			"description": "Generation details. Only available when the generation is finished."
+		}
+	},
+	"required": ["token"],
+	"$defs": {
+		"StreamDetails": {
+			"type": "object",
+			"title": "TextGenerationStreamDetails",
+			"properties": {
+				"finish_reason": {
+					"$ref": "#/$defs/FinishReason",
+					"description": "The reason why the generation was stopped."
+				},
+				"generated_tokens": {
+					"type": "integer",
+					"description": "The number of generated tokens"
+				},
+				"seed": {
+					"type": "integer",
+					"description": "The random seed used for generation"
+				}
+			},
+			"required": ["finish_reason", "generated_tokens", "seed"]
+		}
+	}
+}