|
psutil |
|
sentencepiece # Required for LLaMA tokenizer. |
|
numpy < 2.0.0 |
|
requests >= 2.26.0 |
|
tqdm |
|
blake3 |
|
py-cpuinfo |
|
transformers >= 4.45.2 # Required for Llama 3.2 and Qwen2-VL. |
|
tokenizers >= 0.19.1 # Required for Llama 3. |
|
protobuf # Required by LlamaTokenizer. |
|
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9' |
|
fastapi >= 0.107.0, != 0.113.*, != 0.114.0; python_version >= '3.9' |
|
aiohttp |
|
openai >= 1.52.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support) |
|
uvicorn[standard] |
|
pydantic >= 2.9 # Required for fastapi >= 0.113.0 |
|
prometheus_client >= 0.18.0 |
|
pillow # Required for image processing |
|
prometheus-fastapi-instrumentator >= 7.0.0 |
|
tiktoken >= 0.6.0 # Required for DBRX tokenizer |
|
lm-format-enforcer >= 0.10.9, < 0.11 |
|
outlines == 0.1.11 |
|
lark == 1.2.2 |
|
xgrammar >= 0.1.6; platform_machine == "x86_64" |
|
typing_extensions >= 4.10 |
|
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 |
|
partial-json-parser # used for parsing partial JSON outputs |
|
pyzmq |
|
msgspec |
|
gguf == 0.10.0 |
|
importlib_metadata |
|
mistral_common[opencv] >= 1.5.0 |
|
pyyaml |
|
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 |
|
setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 |
|
einops # Required for Qwen2-VL. |
|
compressed-tensors == 0.9.0 # required for compressed-tensors |
|
depyf==0.18.0 # required for profiling and debugging with compilation config |
|
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py |
|
ray[default] >= 2.9 |
|
nvidia-ml-py >= 12.560.30 # for pynvml package |