inference-widgets / packages /tasks /src /tasks /common-definitions.json
machineuser
Sync widgets demo
a6b2d88
raw
history blame
4.78 kB
{
"$id": "/inference/schemas/common-definitions.json",
"$schema": "http://json-schema.org/draft-06/schema#",
"description": "(Incomplete!) Common type definitions shared by several tasks",
"definitions": {
"ClassificationOutputTransform": {
"title": "ClassificationOutputTransform",
"type": "string",
"description": "The function to apply to the model outputs in order to retrieve the scores.",
"oneOf": [
{
"const": "sigmoid"
},
{
"const": "softmax"
},
{
"const": "none"
}
]
},
"ClassificationOutput": {
"title": "ClassificationOutput",
"type": "object",
"properties": {
"label": {
"type": "string",
"description": "The predicted class label."
},
"score": {
"type": "number",
"description": "The corresponding probability."
}
},
"required": ["label", "score"]
},
"GenerationParameters": {
"title": "GenerationParameters",
"description": "Ad-hoc parametrization of the text generation process",
"type": "object",
"properties": {
"temperature": {
"type": "number",
"description": "The value used to modulate the next token probabilities."
},
"top_k": {
"type": "integer",
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
},
"top_p": {
"type": "number",
"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
},
"typical_p": {
"type": "number",
"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
},
"epsilon_cutoff": {
"type": "number",
"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
},
"eta_cutoff": {
"type": "number",
"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
},
"max_length": {
"type": "integer",
"description": "The maximum length (in tokens) of the generated text, including the input."
},
"max_new_tokens": {
"type": "integer",
"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
},
"min_length": {
"type": "integer",
"description": "The minimum length (in tokens) of the generated text, including the input."
},
"min_new_tokens": {
"type": "integer",
"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
},
"do_sample": {
"type": "boolean",
"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
},
"early_stopping": {
"description": "Controls the stopping condition for beam-based methods.",
"oneOf": [
{
"type": "boolean"
},
{
"const": "never",
"type": "string"
}
]
},
"num_beams": {
"type": "integer",
"description": "Number of beams to use for beam search."
},
"num_beam_groups": {
"type": "integer",
"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
},
"penalty_alpha": {
"type": "number",
"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
},
"use_cache": {
"type": "boolean",
"description": "Whether the model should use the past last key/values attentions to speed up decoding"
}
}
}
}
}