Update README.md
Browse files
README.md
CHANGED
@@ -45,6 +45,7 @@ python -m vllm.entrypoints.openai.api_server \
|
|
45 |
--model teknium/OpenHermes-2.5-Mistral-7B \
|
46 |
--gpu-memory-utilization 0.9 \ # can go as low as 0.83-0.85 if you need a little more gpu for your application
|
47 |
--max-model-len 16000 # 32000 if you can run it. This works on 4090
|
|
|
48 |
```
|
49 |
|
50 |
## Gradio chatbot interface for your endpoint
|
|
|
45 |
--model teknium/OpenHermes-2.5-Mistral-7B \
|
46 |
--gpu-memory-utilization 0.9 \ # can go as low as 0.83-0.85 if you need a little more gpu for your application
|
47 |
--max-model-len 16000 # 32000 if you can run it. This works on 4090
|
48 |
+
--chat-template ./examples/template_chatml.jinja
|
49 |
```
|
50 |
|
51 |
## Gradio chatbot interface for your endpoint
|