machineuser commited on
Commit
0f711ba
·
1 Parent(s): a1bb091

Sync widgets demo

Browse files
packages/tasks/scripts/inference-codegen.ts CHANGED
@@ -54,6 +54,12 @@ async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles:
54
  name: `${taskId}-output`,
55
  schema: await fs.readFile(`${taskSpecDir}/output.json`, { encoding: "utf-8" }),
56
  });
 
 
 
 
 
 
57
  const inputData = new InputData();
58
  inputData.addInput(schema);
59
  return inputData;
 
54
  name: `${taskId}-output`,
55
  schema: await fs.readFile(`${taskSpecDir}/output.json`, { encoding: "utf-8" }),
56
  });
57
+ if (taskId === "text-generation" || taskId === "chat-completion") {
58
+ await schema.addSource({
59
+ name: `${taskId}-stream-output`,
60
+ schema: await fs.readFile(`${taskSpecDir}/output_stream.json`, { encoding: "utf-8" }),
61
+ });
62
+ }
63
  const inputData = new InputData();
64
  inputData.addInput(schema);
65
  return inputData;
packages/tasks/src/tasks/chat-completion/inference.ts ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Inference code generated from the JSON schema spec in ./spec
3
+ *
4
+ * Using src/scripts/inference-codegen
5
+ */
6
+
7
+ /**
8
+ * Inputs for ChatCompletion inference
9
+ */
10
+ export interface ChatCompletionInput {
11
+ /**
12
+ * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
13
+ * frequency in the text so far, decreasing the model's likelihood to repeat the same line
14
+ * verbatim.
15
+ */
16
+ frequency_penalty?: number;
17
+ /**
18
+ * The maximum number of tokens that can be generated in the chat completion.
19
+ */
20
+ max_tokens?: number;
21
+ messages: ChatCompletionInputMessage[];
22
+ /**
23
+ * The random sampling seed.
24
+ */
25
+ seed?: number;
26
+ /**
27
+ * Stop generating tokens if a stop token is generated.
28
+ */
29
+ stop?: ChatCompletionInputStopReason;
30
+ /**
31
+ * If set, partial message deltas will be sent.
32
+ */
33
+ stream?: boolean;
34
+ /**
35
+ * The value used to modulate the logits distribution.
36
+ */
37
+ temperature?: number;
38
+ /**
39
+ * If set to < 1, only the smallest set of most probable tokens with probabilities that add
40
+ * up to `top_p` or higher are kept for generation.
41
+ */
42
+ top_p?: number;
43
+ [property: string]: unknown;
44
+ }
45
+
46
+ export interface ChatCompletionInputMessage {
47
+ /**
48
+ * The content of the message.
49
+ */
50
+ content: string;
51
+ role: ChatCompletionMessageRole;
52
+ [property: string]: unknown;
53
+ }
54
+
55
+ /**
56
+ * The role of the message author.
57
+ */
58
+ export type ChatCompletionMessageRole = "assistant" | "system" | "user";
59
+
60
+ /**
61
+ * Stop generating tokens if a stop token is generated.
62
+ */
63
+ export type ChatCompletionInputStopReason = string[] | string;
64
+
65
+ /**
66
+ * Outputs for Chat Completion inference
67
+ */
68
+ export interface ChatCompletionOutput {
69
+ /**
70
+ * A list of chat completion choices.
71
+ */
72
+ choices: ChatCompletionOutputChoice[];
73
+ /**
74
+ * The Unix timestamp (in seconds) of when the chat completion was created.
75
+ */
76
+ created: number;
77
+ [property: string]: unknown;
78
+ }
79
+
80
+ export interface ChatCompletionOutputChoice {
81
+ /**
82
+ * The reason why the generation was stopped.
83
+ */
84
+ finish_reason: ChatCompletionFinishReason;
85
+ /**
86
+ * The index of the choice in the list of choices.
87
+ */
88
+ index: number;
89
+ message: ChatCompletionOutputChoiceMessage;
90
+ [property: string]: unknown;
91
+ }
92
+
93
+ /**
94
+ * The reason why the generation was stopped.
95
+ *
96
+ * The generated sequence reached the maximum allowed length
97
+ *
98
+ * The model generated an end-of-sentence (EOS) token
99
+ *
100
+ * One of the sequence in stop_sequences was generated
101
+ */
102
+ export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
103
+
104
+ export interface ChatCompletionOutputChoiceMessage {
105
+ /**
106
+ * The content of the chat completion message.
107
+ */
108
+ content: string;
109
+ role: ChatCompletionMessageRole;
110
+ [property: string]: unknown;
111
+ }
112
+
113
+ /**
114
+ * Chat Completion Stream Output
115
+ */
116
+ export interface ChatCompletionStreamOutput {
117
+ /**
118
+ * A list of chat completion choices.
119
+ */
120
+ choices: ChatCompletionStreamOutputChoice[];
121
+ /**
122
+ * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
123
+ * the same timestamp.
124
+ */
125
+ created: number;
126
+ [property: string]: unknown;
127
+ }
128
+
129
+ export interface ChatCompletionStreamOutputChoice {
130
+ /**
131
+ * A chat completion delta generated by streamed model responses.
132
+ */
133
+ delta: ChatCompletionStreamOutputDelta;
134
+ /**
135
+ * The reason why the generation was stopped.
136
+ */
137
+ finish_reason?: ChatCompletionFinishReason;
138
+ /**
139
+ * The index of the choice in the list of choices.
140
+ */
141
+ index: number;
142
+ [property: string]: unknown;
143
+ }
144
+
145
+ /**
146
+ * A chat completion delta generated by streamed model responses.
147
+ */
148
+ export interface ChatCompletionStreamOutputDelta {
149
+ /**
150
+ * The contents of the chunk message.
151
+ */
152
+ content?: string;
153
+ /**
154
+ * The role of the author of this message.
155
+ */
156
+ role?: string;
157
+ [property: string]: unknown;
158
+ }
packages/tasks/src/tasks/chat-completion/spec/input.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "title": "ChatCompletionInput",
3
+ "$id": "/inference/schemas/chat-completion/input.json",
4
+ "$schema": "http://json-schema.org/draft-06/schema#",
5
+ "description": "Inputs for ChatCompletion inference",
6
+ "type": "object",
7
+ "properties": {
8
+ "messages": {
9
+ "type": "array",
10
+ "title": "ChatCompletionInputMessage",
11
+ "items": {
12
+ "type": "object",
13
+ "properties": {
14
+ "role": {
15
+ "$ref": "#/definitions/Role"
16
+ },
17
+ "content": {
18
+ "type": "string",
19
+ "description": "The content of the message."
20
+ }
21
+ },
22
+ "required": ["role", "content"]
23
+ }
24
+ },
25
+ "frequency_penalty": {
26
+ "type": "number",
27
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
28
+ },
29
+ "max_tokens": {
30
+ "type": "integer",
31
+ "description": "The maximum number of tokens that can be generated in the chat completion."
32
+ },
33
+ "seed": {
34
+ "type": "integer",
35
+ "description": "The random sampling seed."
36
+ },
37
+ "stop": {
38
+ "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }],
39
+ "title": "ChatCompletionInputStopReason",
40
+ "description": "Stop generating tokens if a stop token is generated."
41
+ },
42
+ "stream": {
43
+ "type": "boolean",
44
+ "description": "If set, partial message deltas will be sent."
45
+ },
46
+ "temperature": {
47
+ "type": "number",
48
+ "description": "The value used to modulate the logits distribution."
49
+ },
50
+ "top_p": {
51
+ "type": "number",
52
+ "description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
53
+ }
54
+ },
55
+ "required": ["messages"],
56
+ "definitions": {
57
+ "Role": {
58
+ "oneOf": [{ "const": "assistant" }, { "const": "system" }, { "const": "user" }],
59
+ "title": "ChatCompletionMessageRole",
60
+ "description": "The role of the message author."
61
+ }
62
+ }
63
+ }
packages/tasks/src/tasks/chat-completion/spec/output.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$id": "/inference/schemas/chat-completion/output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Outputs for Chat Completion inference",
5
+ "title": "ChatCompletionOutput",
6
+ "type": "object",
7
+ "properties": {
8
+ "choices": {
9
+ "type": "array",
10
+ "description": "A list of chat completion choices.",
11
+ "title": "ChatCompletionOutputChoice",
12
+ "items": {
13
+ "type": "object",
14
+ "properties": {
15
+ "finish_reason": {
16
+ "$ref": "#/definitions/FinishReason",
17
+ "description": "The reason why the generation was stopped."
18
+ },
19
+ "index": {
20
+ "type": "integer",
21
+ "description": "The index of the choice in the list of choices."
22
+ },
23
+ "message": {
24
+ "type": "object",
25
+ "properties": {
26
+ "role": {
27
+ "$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role"
28
+ },
29
+ "content": {
30
+ "type": "string",
31
+ "description": "The content of the chat completion message."
32
+ }
33
+ },
34
+ "title": "ChatCompletionOutputChoiceMessage",
35
+ "required": ["content", "role"]
36
+ }
37
+ },
38
+ "required": ["finish_reason", "index", "message"]
39
+ }
40
+ },
41
+ "created": {
42
+ "type": "integer",
43
+ "description": "The Unix timestamp (in seconds) of when the chat completion was created."
44
+ }
45
+ },
46
+ "required": ["choices", "created"],
47
+ "definitions": {
48
+ "FinishReason": {
49
+ "type": "string",
50
+ "title": "ChatCompletionFinishReason",
51
+ "oneOf": [
52
+ { "const": "length", "description": "The generated sequence reached the maximum allowed length" },
53
+ { "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
54
+ { "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
55
+ ]
56
+ }
57
+ }
58
+ }
packages/tasks/src/tasks/chat-completion/spec/output_stream.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$id": "/inference/schemas/chat-completion/output_stream.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Chat Completion Stream Output",
5
+ "title": "ChatCompletionStreamOutput",
6
+ "type": "object",
7
+ "properties": {
8
+ "choices": {
9
+ "type": "array",
10
+ "title": "ChatCompletionStreamOutputChoice",
11
+ "description": "A list of chat completion choices.",
12
+ "items": {
13
+ "type": "object",
14
+ "properties": {
15
+ "delta": {
16
+ "type": "object",
17
+ "title": "ChatCompletionStreamOutputDelta",
18
+ "description": "A chat completion delta generated by streamed model responses.",
19
+ "properties": {
20
+ "content": {
21
+ "type": "string",
22
+ "description": "The contents of the chunk message."
23
+ },
24
+ "role": {
25
+ "type": "string",
26
+ "description": "The role of the author of this message."
27
+ }
28
+ }
29
+ },
30
+ "finish_reason": {
31
+ "$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason",
32
+ "description": "The reason why the generation was stopped."
33
+ },
34
+ "index": {
35
+ "type": "integer",
36
+ "description": "The index of the choice in the list of choices."
37
+ }
38
+ },
39
+ "required": ["delta", "index"]
40
+ }
41
+ },
42
+ "created": {
43
+ "type": "integer",
44
+ "description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
45
+ }
46
+ },
47
+ "required": ["choices", "created"]
48
+ }
packages/tasks/src/tasks/index.ts CHANGED
@@ -38,6 +38,17 @@ import zeroShotObjectDetection from "./zero-shot-object-detection/data";
38
 
39
  export type * from "./audio-classification/inference";
40
  export type * from "./automatic-speech-recognition/inference";
 
 
 
 
 
 
 
 
 
 
 
41
  export type * from "./document-question-answering/inference";
42
  export type * from "./feature-extraction/inference";
43
  export type * from "./fill-mask/inference";
@@ -73,14 +84,14 @@ export type {
73
  TextClassificationParameters,
74
  } from "./text-classification/inference";
75
  export type {
76
- FinishReason,
77
- PrefillToken,
78
  TextGenerationInput,
79
  TextGenerationOutput,
80
  TextGenerationOutputDetails,
81
  TextGenerationParameters,
82
- TextGenerationSequenceDetails,
83
- Token,
84
  } from "./text-generation/inference";
85
  export type * from "./video-classification/inference";
86
  export type * from "./visual-question-answering/inference";
 
38
 
39
  export type * from "./audio-classification/inference";
40
  export type * from "./automatic-speech-recognition/inference";
41
+ export type {
42
+ ChatCompletionInput,
43
+ ChatCompletionInputMessage,
44
+ ChatCompletionOutput,
45
+ ChatCompletionOutputChoice,
46
+ ChatCompletionFinishReason,
47
+ ChatCompletionOutputChoiceMessage,
48
+ ChatCompletionStreamOutput,
49
+ ChatCompletionStreamOutputChoice,
50
+ ChatCompletionStreamOutputDelta,
51
+ } from "./chat-completion/inference";
52
  export type * from "./document-question-answering/inference";
53
  export type * from "./feature-extraction/inference";
54
  export type * from "./fill-mask/inference";
 
84
  TextClassificationParameters,
85
  } from "./text-classification/inference";
86
  export type {
87
+ TextGenerationFinishReason,
88
+ TextGenerationPrefillToken,
89
  TextGenerationInput,
90
  TextGenerationOutput,
91
  TextGenerationOutputDetails,
92
  TextGenerationParameters,
93
+ TextGenerationOutputSequenceDetails,
94
+ TextGenerationOutputToken,
95
  } from "./text-generation/inference";
96
  export type * from "./video-classification/inference";
97
  export type * from "./visual-question-answering/inference";
packages/tasks/src/tasks/text-generation/inference.ts CHANGED
@@ -16,6 +16,10 @@ export interface TextGenerationInput {
16
  * Additional inference parameters
17
  */
18
  parameters?: TextGenerationParameters;
 
 
 
 
19
  [property: string]: unknown;
20
  }
21
 
@@ -114,16 +118,16 @@ export interface TextGenerationOutputDetails {
114
  /**
115
  * Details about additional sequences when best_of is provided
116
  */
117
- best_of_sequences?: TextGenerationSequenceDetails[];
118
  /**
119
  * The reason why the generation was stopped.
120
  */
121
- finish_reason: FinishReason;
122
  /**
123
  * The number of generated tokens
124
  */
125
  generated_tokens: number;
126
- prefill: PrefillToken[];
127
  /**
128
  * The random seed used for generation
129
  */
@@ -131,24 +135,25 @@ export interface TextGenerationOutputDetails {
131
  /**
132
  * The generated tokens and associated details
133
  */
134
- tokens: Token[];
 
 
 
 
135
  [property: string]: unknown;
136
  }
137
 
138
- export interface TextGenerationSequenceDetails {
139
- /**
140
- * The reason why the generation was stopped.
141
- */
142
- finish_reason: FinishReason;
143
  /**
144
  * The generated text
145
  */
146
- generated_text: number;
147
  /**
148
  * The number of generated tokens
149
  */
150
  generated_tokens: number;
151
- prefill: PrefillToken[];
152
  /**
153
  * The random seed used for generation
154
  */
@@ -156,20 +161,26 @@ export interface TextGenerationSequenceDetails {
156
  /**
157
  * The generated tokens and associated details
158
  */
159
- tokens: Token[];
 
 
 
 
160
  [property: string]: unknown;
161
  }
162
 
163
  /**
164
- * The generated sequence reached the maximum allowed length
 
 
165
  *
166
- * The model generated an end-of-sentence (EOS) token
167
  *
168
- * One of the sequence in stop_sequences was generated
169
  */
170
- export type FinishReason = "length" | "eos_token" | "stop_sequence";
171
 
172
- export interface PrefillToken {
173
  id: number;
174
  logprob: number;
175
  /**
@@ -179,9 +190,12 @@ export interface PrefillToken {
179
  [property: string]: unknown;
180
  }
181
 
182
- export interface Token {
 
 
 
183
  id: number;
184
- logprob: number;
185
  /**
186
  * Whether or not that token is a special one
187
  */
@@ -192,3 +206,45 @@ export interface Token {
192
  text: string;
193
  [property: string]: unknown;
194
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  * Additional inference parameters
17
  */
18
  parameters?: TextGenerationParameters;
19
+ /**
20
+ * Whether to stream output tokens
21
+ */
22
+ stream?: boolean;
23
  [property: string]: unknown;
24
  }
25
 
 
118
  /**
119
  * Details about additional sequences when best_of is provided
120
  */
121
+ best_of_sequences?: TextGenerationOutputSequenceDetails[];
122
  /**
123
  * The reason why the generation was stopped.
124
  */
125
+ finish_reason: TextGenerationFinishReason;
126
  /**
127
  * The number of generated tokens
128
  */
129
  generated_tokens: number;
130
+ prefill: TextGenerationPrefillToken[];
131
  /**
132
  * The random seed used for generation
133
  */
 
135
  /**
136
  * The generated tokens and associated details
137
  */
138
+ tokens: TextGenerationOutputToken[];
139
+ /**
140
+ * Most likely tokens
141
+ */
142
+ top_tokens?: Array<TextGenerationOutputToken[]>;
143
  [property: string]: unknown;
144
  }
145
 
146
+ export interface TextGenerationOutputSequenceDetails {
147
+ finish_reason: TextGenerationFinishReason;
 
 
 
148
  /**
149
  * The generated text
150
  */
151
+ generated_text: string;
152
  /**
153
  * The number of generated tokens
154
  */
155
  generated_tokens: number;
156
+ prefill: TextGenerationPrefillToken[];
157
  /**
158
  * The random seed used for generation
159
  */
 
161
  /**
162
  * The generated tokens and associated details
163
  */
164
+ tokens: TextGenerationOutputToken[];
165
+ /**
166
+ * Most likely tokens
167
+ */
168
+ top_tokens?: Array<TextGenerationOutputToken[]>;
169
  [property: string]: unknown;
170
  }
171
 
172
  /**
173
+ * The reason why the generation was stopped.
174
+ *
175
+ * length: The generated sequence reached the maximum allowed length
176
  *
177
+ * eos_token: The model generated an end-of-sentence (EOS) token
178
  *
179
+ * stop_sequence: One of the sequence in stop_sequences was generated
180
  */
181
+ export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
182
 
183
+ export interface TextGenerationPrefillToken {
184
  id: number;
185
  logprob: number;
186
  /**
 
190
  [property: string]: unknown;
191
  }
192
 
193
+ /**
194
+ * Generated token.
195
+ */
196
+ export interface TextGenerationOutputToken {
197
  id: number;
198
+ logprob?: number;
199
  /**
200
  * Whether or not that token is a special one
201
  */
 
206
  text: string;
207
  [property: string]: unknown;
208
  }
209
+
210
+ /**
211
+ * Text Generation Stream Output
212
+ */
213
+ export interface TextGenerationStreamOutput {
214
+ /**
215
+ * Generation details. Only available when the generation is finished.
216
+ */
217
+ details?: TextGenerationStreamDetails;
218
+ /**
219
+ * The complete generated text. Only available when the generation is finished.
220
+ */
221
+ generated_text?: string;
222
+ /**
223
+ * The token index within the stream. Optional to support older clients that omit it.
224
+ */
225
+ index?: number;
226
+ /**
227
+ * Generated token.
228
+ */
229
+ token: TextGenerationOutputToken;
230
+ [property: string]: unknown;
231
+ }
232
+
233
+ /**
234
+ * Generation details. Only available when the generation is finished.
235
+ */
236
+ export interface TextGenerationStreamDetails {
237
+ /**
238
+ * The reason why the generation was stopped.
239
+ */
240
+ finish_reason: TextGenerationFinishReason;
241
+ /**
242
+ * The number of generated tokens
243
+ */
244
+ generated_tokens: number;
245
+ /**
246
+ * The random seed used for generation
247
+ */
248
+ seed: number;
249
+ [property: string]: unknown;
250
+ }
packages/tasks/src/tasks/text-generation/spec/input.json CHANGED
@@ -12,6 +12,10 @@
12
  "parameters": {
13
  "description": "Additional inference parameters",
14
  "$ref": "#/$defs/TextGenerationParameters"
 
 
 
 
15
  }
16
  },
17
  "$defs": {
 
12
  "parameters": {
13
  "description": "Additional inference parameters",
14
  "$ref": "#/$defs/TextGenerationParameters"
15
+ },
16
+ "stream": {
17
+ "description": "Whether to stream output tokens",
18
+ "type": "boolean"
19
  }
20
  },
21
  "$defs": {
packages/tasks/src/tasks/text-generation/spec/output.json CHANGED
@@ -10,43 +10,45 @@
10
  "description": "The generated text"
11
  },
12
  "details": {
13
- "description": "When enabled, details about the generation",
14
- "title": "TextGenerationOutputDetails",
15
- "allOf": [
16
- { "$ref": "#/$defs/SequenceDetails" },
17
- {
18
- "type": "object",
19
- "properties": {
20
- "best_of_sequences": {
21
- "type": "array",
22
- "description": "Details about additional sequences when best_of is provided",
23
- "items": {
24
- "allOf": [
25
- { "$ref": "#/$defs/SequenceDetails" },
26
- {
27
- "type": "object",
28
- "properties": {
29
- "generated_text": {
30
- "type": "integer",
31
- "description": "The generated text"
32
- }
33
- },
34
- "required": ["generated_text"]
35
- }
36
- ]
37
- }
38
- }
39
- }
40
- }
41
- ]
42
  }
43
  },
44
  "required": ["generated_text"],
45
-
46
  "$defs": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  "Token": {
48
  "type": "object",
49
- "title": "Token",
50
  "properties": {
51
  "id": {
52
  "type": "integer"
@@ -63,20 +65,15 @@
63
  "description": "The text associated with that token"
64
  }
65
  },
66
- "required": ["id", "logprob", "special", "text"]
67
  },
68
- "SequenceDetails": {
69
  "type": "object",
70
- "title": "TextGenerationSequenceDetails",
71
  "properties": {
72
  "finish_reason": {
73
- "type": "string",
74
- "description": "The reason why the generation was stopped.",
75
- "oneOf": [
76
- { "const": "length", "description": "The generated sequence reached the maximum allowed length" },
77
- { "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
78
- { "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
79
- ]
80
  },
81
  "generated_tokens": {
82
  "type": "integer",
@@ -85,21 +82,7 @@
85
  "prefill": {
86
  "type": "array",
87
  "items": {
88
- "title": "PrefillToken",
89
- "type": "object",
90
- "properties": {
91
- "id": {
92
- "type": "integer"
93
- },
94
- "logprob": {
95
- "type": "number"
96
- },
97
- "text": {
98
- "type": "string",
99
- "description": "The text associated with that token"
100
- }
101
- },
102
- "required": ["id", "logprob", "text"]
103
  }
104
  },
105
  "seed": {
@@ -112,9 +95,71 @@
112
  "items": {
113
  "$ref": "#/$defs/Token"
114
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  }
116
  },
117
  "required": ["finish_reason", "generated_tokens", "prefill", "tokens"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  }
119
  }
120
  }
 
10
  "description": "The generated text"
11
  },
12
  "details": {
13
+ "$ref": "#/$defs/Details",
14
+ "description": "When enabled, details about the generation"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
16
  },
17
  "required": ["generated_text"],
 
18
  "$defs": {
19
+ "FinishReason": {
20
+ "type": "string",
21
+ "title": "TextGenerationFinishReason",
22
+ "description": "The reason why the generation was stopped.",
23
+ "oneOf": [
24
+ { "const": "length", "description": "length: The generated sequence reached the maximum allowed length" },
25
+ { "const": "eos_token", "description": "eos_token: The model generated an end-of-sentence (EOS) token" },
26
+ {
27
+ "const": "stop_sequence",
28
+ "description": "stop_sequence: One of the sequence in stop_sequences was generated"
29
+ }
30
+ ]
31
+ },
32
+ "PrefillToken": {
33
+ "title": "TextGenerationPrefillToken",
34
+ "type": "object",
35
+ "properties": {
36
+ "id": {
37
+ "type": "integer"
38
+ },
39
+ "logprob": {
40
+ "type": "number"
41
+ },
42
+ "text": {
43
+ "type": "string",
44
+ "description": "The text associated with that token"
45
+ }
46
+ },
47
+ "required": ["id", "logprob", "text"]
48
+ },
49
  "Token": {
50
  "type": "object",
51
+ "title": "TextGenerationOutputToken",
52
  "properties": {
53
  "id": {
54
  "type": "integer"
 
65
  "description": "The text associated with that token"
66
  }
67
  },
68
+ "required": ["id", "special", "text"]
69
  },
70
+ "Details": {
71
  "type": "object",
72
+ "title": "TextGenerationOutputDetails",
73
  "properties": {
74
  "finish_reason": {
75
+ "$ref": "#/$defs/FinishReason",
76
+ "description": "The reason why the generation was stopped."
 
 
 
 
 
77
  },
78
  "generated_tokens": {
79
  "type": "integer",
 
82
  "prefill": {
83
  "type": "array",
84
  "items": {
85
+ "$ref": "#/$defs/PrefillToken"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  }
87
  },
88
  "seed": {
 
95
  "items": {
96
  "$ref": "#/$defs/Token"
97
  }
98
+ },
99
+ "top_tokens": {
100
+ "type": "array",
101
+ "description": "Most likely tokens",
102
+ "items": {
103
+ "type": "array",
104
+ "items": {
105
+ "$ref": "#/$defs/Token"
106
+ }
107
+ }
108
+ },
109
+ "best_of_sequences": {
110
+ "type": "array",
111
+ "description": "Details about additional sequences when best_of is provided",
112
+ "items": {
113
+ "$ref": "#/$defs/SequenceDetails"
114
+ }
115
  }
116
  },
117
  "required": ["finish_reason", "generated_tokens", "prefill", "tokens"]
118
+ },
119
+ "SequenceDetails": {
120
+ "type": "object",
121
+ "title": "TextGenerationOutputSequenceDetails",
122
+ "properties": {
123
+ "generated_text": {
124
+ "type": "string",
125
+ "description": "The generated text"
126
+ },
127
+ "finish_reason": {
128
+ "$ref": "#/$defs/FinishReason"
129
+ },
130
+ "generated_tokens": {
131
+ "type": "integer",
132
+ "description": "The number of generated tokens"
133
+ },
134
+ "prefill": {
135
+ "type": "array",
136
+ "items": {
137
+ "$ref": "#/$defs/PrefillToken"
138
+ }
139
+ },
140
+ "seed": {
141
+ "type": "integer",
142
+ "description": "The random seed used for generation"
143
+ },
144
+ "tokens": {
145
+ "type": "array",
146
+ "description": "The generated tokens and associated details",
147
+ "items": {
148
+ "$ref": "#/$defs/Token"
149
+ }
150
+ },
151
+ "top_tokens": {
152
+ "type": "array",
153
+ "description": "Most likely tokens",
154
+ "items": {
155
+ "type": "array",
156
+ "items": {
157
+ "$ref": "#/$defs/Token"
158
+ }
159
+ }
160
+ }
161
+ },
162
+ "required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"]
163
  }
164
  }
165
  }
packages/tasks/src/tasks/text-generation/spec/output_stream.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$id": "/inference/schemas/text-generation/output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Text Generation Stream Output",
5
+ "title": "TextGenerationStreamOutput",
6
+ "type": "object",
7
+ "properties": {
8
+ "token": {
9
+ "$ref": "#/$defs/Token",
10
+ "description": "Generated token."
11
+ },
12
+ "index": {
13
+ "type": "integer",
14
+ "description": "The token index within the stream. Optional to support older clients that omit it."
15
+ },
16
+ "generated_text": {
17
+ "type": "string",
18
+ "description": "The complete generated text. Only available when the generation is finished."
19
+ },
20
+ "details": {
21
+ "$ref": "#/$defs/StreamDetails",
22
+ "description": "Generation details. Only available when the generation is finished."
23
+ }
24
+ },
25
+ "required": ["token"],
26
+ "$defs": {
27
+ "StreamDetails": {
28
+ "type": "object",
29
+ "title": "TextGenerationStreamDetails",
30
+ "properties": {
31
+ "finish_reason": {
32
+ "$ref": "#/$defs/FinishReason",
33
+ "description": "The reason why the generation was stopped."
34
+ },
35
+ "generated_tokens": {
36
+ "type": "integer",
37
+ "description": "The number of generated tokens"
38
+ },
39
+ "seed": {
40
+ "type": "integer",
41
+ "description": "The random seed used for generation"
42
+ }
43
+ },
44
+ "required": ["finish_reason", "generated_tokens", "seed"]
45
+ }
46
+ }
47
+ }