Spaces:
Runtime error
Runtime error
| { | |
| "openapi": "3.0.3", | |
| "info": { | |
| "title": "Text Generation Inference", | |
| "description": "Text Generation Webserver", | |
| "contact": { | |
| "name": "Olivier Dehaene" | |
| }, | |
| "license": { | |
| "name": "Apache 2.0", | |
| "url": "https://www.apache.org/licenses/LICENSE-2.0" | |
| }, | |
| "version": "0.6.0" | |
| }, | |
| "paths": { | |
| "/": { | |
| "post": { | |
| "tags": [ | |
| "Text Generation Inference" | |
| ], | |
| "summary": "Generate tokens if `stream == false` or a stream of token if `stream == true`", | |
| "description": "Generate tokens if `stream == false` or a stream of token if `stream == true`", | |
| "operationId": "compat_generate", | |
| "requestBody": { | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/CompatGenerateRequest" | |
| } | |
| } | |
| }, | |
| "required": true | |
| }, | |
| "responses": { | |
| "200": { | |
| "description": "See /generate or /generate_stream" | |
| }, | |
| "422": { | |
| "description": "Input validation error", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Input validation error" | |
| } | |
| } | |
| } | |
| }, | |
| "424": { | |
| "description": "Generation Error", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Request failed during generation" | |
| } | |
| } | |
| } | |
| }, | |
| "429": { | |
| "description": "Model is overloaded", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Model is overloaded" | |
| } | |
| } | |
| } | |
| }, | |
| "500": { | |
| "description": "Incomplete generation", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Incomplete generation" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "/generate": { | |
| "post": { | |
| "tags": [ | |
| "Text Generation Inference" | |
| ], | |
| "summary": "Generate tokens", | |
| "description": "Generate tokens", | |
| "operationId": "generate", | |
| "requestBody": { | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/GenerateRequest" | |
| } | |
| } | |
| }, | |
| "required": true | |
| }, | |
| "responses": { | |
| "200": { | |
| "description": "Generated Text", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/GenerateResponse" | |
| } | |
| } | |
| } | |
| }, | |
| "422": { | |
| "description": "Input validation error", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Input validation error" | |
| } | |
| } | |
| } | |
| }, | |
| "424": { | |
| "description": "Generation Error", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Request failed during generation" | |
| } | |
| } | |
| } | |
| }, | |
| "429": { | |
| "description": "Model is overloaded", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Model is overloaded" | |
| } | |
| } | |
| } | |
| }, | |
| "500": { | |
| "description": "Incomplete generation", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Incomplete generation" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "/generate_stream": { | |
| "post": { | |
| "tags": [ | |
| "Text Generation Inference" | |
| ], | |
| "summary": "Generate a stream of token using Server-Sent Events", | |
| "description": "Generate a stream of token using Server-Sent Events", | |
| "operationId": "generate_stream", | |
| "requestBody": { | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/GenerateRequest" | |
| } | |
| } | |
| }, | |
| "required": true | |
| }, | |
| "responses": { | |
| "200": { | |
| "description": "Generated Text", | |
| "content": { | |
| "text/event-stream": { | |
| "schema": { | |
| "$ref": "#/components/schemas/StreamResponse" | |
| } | |
| } | |
| } | |
| }, | |
| "422": { | |
| "description": "Input validation error", | |
| "content": { | |
| "text/event-stream": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Input validation error" | |
| } | |
| } | |
| } | |
| }, | |
| "424": { | |
| "description": "Generation Error", | |
| "content": { | |
| "text/event-stream": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Request failed during generation" | |
| } | |
| } | |
| } | |
| }, | |
| "429": { | |
| "description": "Model is overloaded", | |
| "content": { | |
| "text/event-stream": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Model is overloaded" | |
| } | |
| } | |
| } | |
| }, | |
| "500": { | |
| "description": "Incomplete generation", | |
| "content": { | |
| "text/event-stream": { | |
| "schema": { | |
| "$ref": "#/components/schemas/ErrorResponse" | |
| }, | |
| "example": { | |
| "error": "Incomplete generation" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "/info": { | |
| "get": { | |
| "tags": [ | |
| "Text Generation Inference" | |
| ], | |
| "summary": "Text Generation Inference endpoint info", | |
| "description": "Text Generation Inference endpoint info", | |
| "operationId": "get_model_info", | |
| "responses": { | |
| "200": { | |
| "description": "Served model info", | |
| "content": { | |
| "application/json": { | |
| "schema": { | |
| "$ref": "#/components/schemas/Info" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "/metrics": { | |
| "get": { | |
| "tags": [ | |
| "Text Generation Inference" | |
| ], | |
| "summary": "Prometheus metrics scrape endpoint", | |
| "description": "Prometheus metrics scrape endpoint", | |
| "operationId": "metrics", | |
| "responses": { | |
| "200": { | |
| "description": "Prometheus Metrics", | |
| "content": { | |
| "text/plain": { | |
| "schema": { | |
| "type": "string" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "components": { | |
| "schemas": { | |
| "BestOfSequence": { | |
| "type": "object", | |
| "required": [ | |
| "generated_text", | |
| "finish_reason", | |
| "generated_tokens", | |
| "prefill", | |
| "tokens" | |
| ], | |
| "properties": { | |
| "finish_reason": { | |
| "$ref": "#/components/schemas/FinishReason" | |
| }, | |
| "generated_text": { | |
| "type": "string", | |
| "example": "test" | |
| }, | |
| "generated_tokens": { | |
| "type": "integer", | |
| "format": "int32", | |
| "example": 1, | |
| "minimum": 0.0 | |
| }, | |
| "prefill": { | |
| "type": "array", | |
| "items": { | |
| "$ref": "#/components/schemas/PrefillToken" | |
| } | |
| }, | |
| "seed": { | |
| "type": "integer", | |
| "format": "int64", | |
| "example": 42, | |
| "nullable": true, | |
| "minimum": 0.0 | |
| }, | |
| "tokens": { | |
| "type": "array", | |
| "items": { | |
| "$ref": "#/components/schemas/Token" | |
| } | |
| } | |
| } | |
| }, | |
| "CompatGenerateRequest": { | |
| "type": "object", | |
| "required": [ | |
| "inputs" | |
| ], | |
| "properties": { | |
| "inputs": { | |
| "type": "string", | |
| "example": "My name is Olivier and I" | |
| }, | |
| "parameters": { | |
| "$ref": "#/components/schemas/GenerateParameters" | |
| }, | |
| "stream": { | |
| "type": "boolean" | |
| } | |
| } | |
| }, | |
| "Details": { | |
| "type": "object", | |
| "required": [ | |
| "finish_reason", | |
| "generated_tokens", | |
| "prefill", | |
| "tokens" | |
| ], | |
| "properties": { | |
| "best_of_sequences": { | |
| "type": "array", | |
| "items": { | |
| "$ref": "#/components/schemas/BestOfSequence" | |
| }, | |
| "nullable": true | |
| }, | |
| "finish_reason": { | |
| "$ref": "#/components/schemas/FinishReason" | |
| }, | |
| "generated_tokens": { | |
| "type": "integer", | |
| "format": "int32", | |
| "example": 1, | |
| "minimum": 0.0 | |
| }, | |
| "prefill": { | |
| "type": "array", | |
| "items": { | |
| "$ref": "#/components/schemas/PrefillToken" | |
| } | |
| }, | |
| "seed": { | |
| "type": "integer", | |
| "format": "int64", | |
| "example": 42, | |
| "nullable": true, | |
| "minimum": 0.0 | |
| }, | |
| "tokens": { | |
| "type": "array", | |
| "items": { | |
| "$ref": "#/components/schemas/Token" | |
| } | |
| } | |
| } | |
| }, | |
| "ErrorResponse": { | |
| "type": "object", | |
| "required": [ | |
| "error", | |
| "error_type" | |
| ], | |
| "properties": { | |
| "error": { | |
| "type": "string" | |
| }, | |
| "error_type": { | |
| "type": "string" | |
| } | |
| } | |
| }, | |
| "FinishReason": { | |
| "type": "string", | |
| "enum": [ | |
| "length", | |
| "eos_token", | |
| "stop_sequence" | |
| ] | |
| }, | |
| "GenerateParameters": { | |
| "type": "object", | |
| "properties": { | |
| "best_of": { | |
| "type": "integer", | |
| "default": "null", | |
| "example": 1, | |
| "nullable": true, | |
| "minimum": 0.0, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "details": { | |
| "type": "boolean", | |
| "default": "true" | |
| }, | |
| "do_sample": { | |
| "type": "boolean", | |
| "default": "false", | |
| "example": true | |
| }, | |
| "max_new_tokens": { | |
| "type": "integer", | |
| "format": "int32", | |
| "default": "20", | |
| "minimum": 0.0, | |
| "exclusiveMaximum": 512.0, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "repetition_penalty": { | |
| "type": "number", | |
| "format": "float", | |
| "default": "null", | |
| "example": 1.03, | |
| "nullable": true, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "return_full_text": { | |
| "type": "boolean", | |
| "default": "null", | |
| "example": false, | |
| "nullable": true | |
| }, | |
| "seed": { | |
| "type": "integer", | |
| "format": "int64", | |
| "default": "null", | |
| "example": "null", | |
| "nullable": true, | |
| "minimum": 0.0, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "stop": { | |
| "type": "array", | |
| "items": { | |
| "type": "string" | |
| }, | |
| "example": [ | |
| "photographer" | |
| ], | |
| "maxItems": 4 | |
| }, | |
| "temperature": { | |
| "type": "number", | |
| "format": "float", | |
| "default": "null", | |
| "example": 0.5, | |
| "nullable": true, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "top_k": { | |
| "type": "integer", | |
| "format": "int32", | |
| "default": "null", | |
| "example": 10, | |
| "nullable": true, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "top_p": { | |
| "type": "number", | |
| "format": "float", | |
| "default": "null", | |
| "example": 0.95, | |
| "nullable": true, | |
| "maximum": 1.0, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "truncate": { | |
| "type": "integer", | |
| "default": "null", | |
| "example": "null", | |
| "nullable": true, | |
| "minimum": 0.0 | |
| }, | |
| "typical_p": { | |
| "type": "number", | |
| "format": "float", | |
| "default": "null", | |
| "example": 0.95, | |
| "nullable": true, | |
| "maximum": 1.0, | |
| "exclusiveMinimum": 0.0 | |
| }, | |
| "watermark": { | |
| "type": "boolean", | |
| "default": "false", | |
| "example": true | |
| } | |
| } | |
| }, | |
| "GenerateRequest": { | |
| "type": "object", | |
| "required": [ | |
| "inputs" | |
| ], | |
| "properties": { | |
| "inputs": { | |
| "type": "string", | |
| "example": "My name is Olivier and I" | |
| }, | |
| "parameters": { | |
| "$ref": "#/components/schemas/GenerateParameters" | |
| } | |
| } | |
| }, | |
| "GenerateResponse": { | |
| "type": "object", | |
| "required": [ | |
| "generated_text" | |
| ], | |
| "properties": { | |
| "details": { | |
| "allOf": [ | |
| { | |
| "$ref": "#/components/schemas/Details" | |
| } | |
| ], | |
| "nullable": true | |
| }, | |
| "generated_text": { | |
| "type": "string", | |
| "example": "test" | |
| } | |
| } | |
| }, | |
| "Info": { | |
| "type": "object", | |
| "required": [ | |
| "model_id", | |
| "version" | |
| ], | |
| "properties": { | |
| "model_id": { | |
| "type": "string", | |
| "example": "bigscience/blomm-560m" | |
| }, | |
| "model_pipeline_tag": { | |
| "type": "string", | |
| "example": "text-generation", | |
| "nullable": true | |
| }, | |
| "model_sha": { | |
| "type": "string", | |
| "example": "e985a63cdc139290c5f700ff1929f0b5942cced2", | |
| "nullable": true | |
| }, | |
| "sha": { | |
| "type": "string", | |
| "example": "null", | |
| "nullable": true | |
| }, | |
| "version": { | |
| "type": "string", | |
| "example": "0.5.0" | |
| } | |
| } | |
| }, | |
| "PrefillToken": { | |
| "type": "object", | |
| "required": [ | |
| "id", | |
| "text", | |
| "logprob" | |
| ], | |
| "properties": { | |
| "id": { | |
| "type": "integer", | |
| "format": "int32", | |
| "example": 0, | |
| "minimum": 0.0 | |
| }, | |
| "logprob": { | |
| "type": "number", | |
| "format": "float", | |
| "example": -0.34, | |
| "nullable": true | |
| }, | |
| "text": { | |
| "type": "string", | |
| "example": "test" | |
| } | |
| } | |
| }, | |
| "StreamDetails": { | |
| "type": "object", | |
| "required": [ | |
| "finish_reason", | |
| "generated_tokens" | |
| ], | |
| "properties": { | |
| "finish_reason": { | |
| "$ref": "#/components/schemas/FinishReason" | |
| }, | |
| "generated_tokens": { | |
| "type": "integer", | |
| "format": "int32", | |
| "example": 1, | |
| "minimum": 0.0 | |
| }, | |
| "seed": { | |
| "type": "integer", | |
| "format": "int64", | |
| "example": 42, | |
| "nullable": true, | |
| "minimum": 0.0 | |
| } | |
| } | |
| }, | |
| "StreamResponse": { | |
| "type": "object", | |
| "required": [ | |
| "token" | |
| ], | |
| "properties": { | |
| "details": { | |
| "allOf": [ | |
| { | |
| "$ref": "#/components/schemas/StreamDetails" | |
| } | |
| ], | |
| "nullable": true | |
| }, | |
| "generated_text": { | |
| "type": "string", | |
| "default": "null", | |
| "example": "test", | |
| "nullable": true | |
| }, | |
| "token": { | |
| "$ref": "#/components/schemas/Token" | |
| } | |
| } | |
| }, | |
| "Token": { | |
| "type": "object", | |
| "required": [ | |
| "id", | |
| "text", | |
| "logprob", | |
| "special" | |
| ], | |
| "properties": { | |
| "id": { | |
| "type": "integer", | |
| "format": "int32", | |
| "example": 0, | |
| "minimum": 0.0 | |
| }, | |
| "logprob": { | |
| "type": "number", | |
| "format": "float", | |
| "example": -0.34, | |
| "nullable": true | |
| }, | |
| "special": { | |
| "type": "boolean", | |
| "example": "false" | |
| }, | |
| "text": { | |
| "type": "string", | |
| "example": "test" | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "tags": [ | |
| { | |
| "name": "Text Generation Inference", | |
| "description": "Hugging Face Text Generation Inference API" | |
| } | |
| ] | |
| } |