Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import { InferenceOutputError } from "../../lib/InferenceOutputError"; | |
import type { BaseArgs, Options } from "../../types"; | |
import { request } from "../custom/request"; | |
/** | |
* Inputs for Text Generation inference | |
*/ | |
export interface TextGenerationInput { | |
/** | |
* The text to initialize generation with | |
*/ | |
inputs: string; | |
/** | |
* Additional inference parameters | |
*/ | |
parameters?: TextGenerationParameters; | |
/** | |
* Whether to stream output tokens | |
*/ | |
stream?: boolean; | |
[property: string]: unknown; | |
} | |
/** | |
* Additional inference parameters | |
* | |
* Additional inference parameters for Text Generation | |
*/ | |
export interface TextGenerationParameters { | |
/** | |
* The number of sampling queries to run. Only the best one (in terms of total logprob) will | |
* be returned. | |
*/ | |
best_of?: number; | |
/** | |
* Whether or not to output decoder input details | |
*/ | |
decoder_input_details?: boolean; | |
/** | |
* Whether or not to output details | |
*/ | |
details?: boolean; | |
/** | |
* Whether to use logits sampling instead of greedy decoding when generating new tokens. | |
*/ | |
do_sample?: boolean; | |
/** | |
* The maximum number of tokens to generate. | |
*/ | |
max_new_tokens?: number; | |
/** | |
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this | |
* paper](https://hf.co/papers/1909.05858) for more details. | |
*/ | |
repetition_penalty?: number; | |
/** | |
* Whether to prepend the prompt to the generated text. | |
*/ | |
return_full_text?: boolean; | |
/** | |
* The random sampling seed. | |
*/ | |
seed?: number; | |
/** | |
* Stop generating tokens if a member of `stop_sequences` is generated. | |
*/ | |
stop_sequences?: string[]; | |
/** | |
* The value used to modulate the logits distribution. | |
*/ | |
temperature?: number; | |
/** | |
* The number of highest probability vocabulary tokens to keep for top-k-filtering. | |
*/ | |
top_k?: number; | |
/** | |
* If set to < 1, only the smallest set of most probable tokens with probabilities that add | |
* up to `top_p` or higher are kept for generation. | |
*/ | |
top_p?: number; | |
/** | |
* Truncate input tokens to the given size. | |
*/ | |
truncate?: number; | |
/** | |
* Typical Decoding mass. See [Typical Decoding for Natural Language | |
* Generation](https://hf.co/papers/2202.00666) for more information | |
*/ | |
typical_p?: number; | |
/** | |
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226) | |
*/ | |
watermark?: boolean; | |
[property: string]: unknown; | |
} | |
/** | |
* Outputs for Text Generation inference | |
*/ | |
export interface TextGenerationOutput { | |
/** | |
* When enabled, details about the generation | |
*/ | |
details?: TextGenerationOutputDetails; | |
/** | |
* The generated text | |
*/ | |
generated_text: string; | |
[property: string]: unknown; | |
} | |
/** | |
* When enabled, details about the generation | |
*/ | |
export interface TextGenerationOutputDetails { | |
/** | |
* Details about additional sequences when best_of is provided | |
*/ | |
best_of_sequences?: TextGenerationOutputSequenceDetails[]; | |
/** | |
* The reason why the generation was stopped. | |
*/ | |
finish_reason: TextGenerationFinishReason; | |
/** | |
* The number of generated tokens | |
*/ | |
generated_tokens: number; | |
prefill: TextGenerationPrefillToken[]; | |
/** | |
* The random seed used for generation | |
*/ | |
seed?: number; | |
/** | |
* The generated tokens and associated details | |
*/ | |
tokens: TextGenerationOutputToken[]; | |
/** | |
* Most likely tokens | |
*/ | |
top_tokens?: Array<TextGenerationOutputToken[]>; | |
[property: string]: unknown; | |
} | |
export interface TextGenerationOutputSequenceDetails { | |
finish_reason: TextGenerationFinishReason; | |
/** | |
* The generated text | |
*/ | |
generated_text: string; | |
/** | |
* The number of generated tokens | |
*/ | |
generated_tokens: number; | |
prefill: TextGenerationPrefillToken[]; | |
/** | |
* The random seed used for generation | |
*/ | |
seed?: number; | |
/** | |
* The generated tokens and associated details | |
*/ | |
tokens: TextGenerationOutputToken[]; | |
/** | |
* Most likely tokens | |
*/ | |
top_tokens?: Array<TextGenerationOutputToken[]>; | |
[property: string]: unknown; | |
} | |
export interface TextGenerationPrefillToken { | |
id: number; | |
logprob: number; | |
/** | |
* The text associated with that token | |
*/ | |
text: string; | |
[property: string]: unknown; | |
} | |
/** | |
* Generated token. | |
*/ | |
export interface TextGenerationOutputToken { | |
id: number; | |
logprob?: number; | |
/** | |
* Whether or not that token is a special one | |
*/ | |
special: boolean; | |
/** | |
* The text associated with that token | |
*/ | |
text: string; | |
[property: string]: unknown; | |
} | |
/** | |
* The reason why the generation was stopped. | |
* | |
* length: The generated sequence reached the maximum allowed length | |
* | |
* eos_token: The model generated an end-of-sentence (EOS) token | |
* | |
* stop_sequence: One of the sequence in stop_sequences was generated | |
*/ | |
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence"; | |
/** | |
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with). | |
*/ | |
export async function textGeneration( | |
args: BaseArgs & TextGenerationInput, | |
options?: Options | |
): Promise<TextGenerationOutput> { | |
const res = await request<TextGenerationOutput[]>(args, { | |
...options, | |
taskHint: "text-generation", | |
}); | |
const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string"); | |
if (!isValidOutput) { | |
throw new InferenceOutputError("Expected Array<{generated_text: string}>"); | |
} | |
return res?.[0]; | |
} | |