Update README.md
Browse files
README.md
CHANGED
@@ -117,6 +117,7 @@ input_tokens = tokenizer(
|
|
117 |
generated_ids = model.generate(
|
118 |
**input_tokens,
|
119 |
max_new_tokens=256,
|
|
|
120 |
)[0]
|
121 |
|
122 |
generated_response = tokenizer.decode(
|
@@ -133,17 +134,75 @@ Then our `generated_response` will look like this:
|
|
133 |
]</tool_call><|im_end|>
|
134 |
```
|
135 |
|
|
|
136 |
|
|
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
|
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
|
|
|
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
|
148 |
|
149 |
|
|
|
117 |
generated_ids = model.generate(
|
118 |
**input_tokens,
|
119 |
max_new_tokens=256,
|
120 |
+
do_sample=False,
|
121 |
)[0]
|
122 |
|
123 |
generated_response = tokenizer.decode(
|
|
|
134 |
]</tool_call><|im_end|>
|
135 |
```
|
136 |
|
137 |
+
## Usage (VLLM) <a name="usage_vllm"></a>
|
138 |
|
139 |
+
For corrected work online serving in VLLM you need additionally load [qwen2_tool_parser.py]() and [chat_template.jinja]() from this repository.
|
140 |
|
141 |
+
```
|
142 |
+
vllm serve Vikhrmodels/Qwen2.5-7B-Instruct-Tool-Planning \
|
143 |
+
--download-dir "/path/to/cache" \
|
144 |
+
--chat-template "/path/to/chat_template.jinja" \
|
145 |
+
--tool-parser-plugin "/path/to/qwen2_tool_parser.py" \
|
146 |
+
--tool-call-parser "qwen2" \
|
147 |
+
--enable-auto-tool-choice
|
148 |
+
```
|
149 |
|
150 |
+
After that you can start doing requests:
|
151 |
|
152 |
+
```python
|
153 |
+
from openai import OpenAI
|
154 |
+
import json
|
155 |
+
|
156 |
+
client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")
|
157 |
+
tools = [
|
158 |
+
{
|
159 |
+
"type": "function",
|
160 |
+
"function": {
|
161 |
+
"name": "get_weather",
|
162 |
+
"description": "Get the current weather in a given location",
|
163 |
+
"parameters": {
|
164 |
+
"type": "object",
|
165 |
+
"properties": {
|
166 |
+
"location": {"type": "string", "description": "City and state."},
|
167 |
+
},
|
168 |
+
"required": ["location"]
|
169 |
+
}
|
170 |
+
}
|
171 |
+
}
|
172 |
+
]
|
173 |
|
174 |
+
response = client.chat.completions.create(
|
175 |
+
model=client.models.list().data[0].id,
|
176 |
+
messages=[
|
177 |
+
{"role": "user", "content": "What's the weather in Krasnodar and Moscow?"}
|
178 |
+
],
|
179 |
+
tools=tools,
|
180 |
+
)
|
181 |
|
182 |
+
print(response.choices[0].message)
|
183 |
+
```
|
184 |
|
185 |
+
```
|
186 |
+
ChatCompletionMessage(
|
187 |
+
content='<|start_thinking|>I need to get the weather for Krasnodar and Moscow.<|end_thinking|>',
|
188 |
+
refusal=None,
|
189 |
+
role='assistant',
|
190 |
+
audio=None,
|
191 |
+
function_call=None,
|
192 |
+
tool_calls=[
|
193 |
+
ChatCompletionMessageToolCall(
|
194 |
+
id='chatcmpl-tool-73646c73148e4af9ac53656d6aa3e3c6',
|
195 |
+
function=Function(arguments='{"location": "Krasnodar"}', name='get_weather'),
|
196 |
+
type='function'),
|
197 |
+
ChatCompletionMessageToolCall(
|
198 |
+
id='chatcmpl-tool-95d93590d1a24df6a4f44a87a83f7761',
|
199 |
+
function=Function(arguments='{"location": "Moscow"}', name='get_weather'),
|
200 |
+
type='function')
|
201 |
+
],
|
202 |
+
reasoning_content=None)
|
203 |
+
```
|
204 |
|
205 |
+
## Tool Planning Examples <a name="examples"></a>
|
|
|
|
|
206 |
|
207 |
|
208 |
|