tpircsc commited on
Commit
694a8cd
·
verified ·
1 Parent(s): 2e22c22

tpircsc/phi-4-mini-it-thinking-function_calling-V0

Browse files
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  library_name: transformers
3
  model_name: phi-4-mini-it-thinking-function_calling-V0
4
  tags:
@@ -6,17 +7,11 @@ tags:
6
  - trl
7
  - sft
8
  licence: license
9
- datasets:
10
- - Jofthomas/hermes-function-calling-thinking-V1
11
- pipeline_tag: text-generation
12
  ---
13
 
14
  # Model Card for phi-4-mini-it-thinking-function_calling-V0
15
 
16
  This model is a fine-tuned version of [microsoft/Phi-4-mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct).
17
-
18
- It uses a modified subset of the [NousResearch/hermes-function-calling-v1](https://huggingface.co/datasets/NousResearch/hermes-function-calling-v1) dataset: [Jofthomas/hermes-function-calling-thinking-V1](https://huggingface.co/datasets/Jofthomas/hermes-function-calling-thinking-V1), which adds a thinking step to the base dataset.
19
-
20
  It has been trained using [TRL](https://github.com/huggingface/trl).
21
 
22
  ## Quick start
 
1
  ---
2
+ base_model: microsoft/Phi-4-mini-instruct
3
  library_name: transformers
4
  model_name: phi-4-mini-it-thinking-function_calling-V0
5
  tags:
 
7
  - trl
8
  - sft
9
  licence: license
 
 
 
10
  ---
11
 
12
  # Model Card for phi-4-mini-it-thinking-function_calling-V0
13
 
14
  This model is a fine-tuned version of [microsoft/Phi-4-mini-instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct).
 
 
 
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
adapter_config.json CHANGED
@@ -12,7 +12,7 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 64,
16
  "lora_bias": false,
17
  "lora_dropout": 0.05,
18
  "megatron_config": null,
@@ -23,15 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
- "k_proj",
28
- "down_proj",
29
- "up_proj",
30
  "o_proj",
31
- "v_proj",
32
- "embed_tokens",
33
- "q_proj",
34
- "lm_head"
35
  ],
36
  "task_type": "CAUSAL_LM",
37
  "use_dora": false,
 
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
+ "lora_alpha": 32,
16
  "lora_bias": false,
17
  "lora_dropout": 0.05,
18
  "megatron_config": null,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "qkv_proj",
 
 
 
27
  "o_proj",
28
+ "down_proj",
29
+ "gate_up_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfc16a3056ec1f0655fddd8da6baeabe5da3a9c5ee86b1626661573ee7d52858
3
- size 2519746832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a620777428885e25225eccc07d8caf21da2d08c3d0d6d4d7c4b11b4898920a86
3
+ size 2550290272
added_tokens.json CHANGED
@@ -1,14 +1,6 @@
1
  {
2
- "</think>": 200033,
3
- "</tool_call>": 200035,
4
- "</tool_reponse>": 200037,
5
- "</tools>": 200031,
6
- "<eos>": 200038,
7
- "<pad>": 200029,
8
- "<think>": 200032,
9
- "<tool_call>": 200034,
10
- "<tool_reponse>": 200036,
11
- "<tools>": 200030,
12
  "<|/tool_call|>": 200026,
13
  "<|/tool|>": 200024,
14
  "<|assistant|>": 200019,
 
1
  {
2
+ "</think>": 200030,
3
+ "<think>": 200029,
 
 
 
 
 
 
 
 
4
  "<|/tool_call|>": 200026,
5
  "<|/tool|>": 200024,
6
  "<|assistant|>": 200019,
runs/Mar11_17-21-08_73fdee6cedcf/events.out.tfevents.1741713685.73fdee6cedcf.803.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633e04ac6a416d8a39617ab78df3ce17b1c0d167072f083a09b796cdad1158ff
3
+ size 11917
special_tokens_map.json CHANGED
@@ -1,15 +1,7 @@
1
  {
2
  "additional_special_tokens": [
3
- "<tools>",
4
- "</tools>",
5
  "<think>",
6
- "</think>",
7
- "<tool_call>",
8
- "</tool_call>",
9
- "<tool_reponse>",
10
- "</tool_reponse>",
11
- "<pad>",
12
- "<eos>"
13
  ],
14
  "bos_token": {
15
  "content": "<|endoftext|>",
@@ -18,8 +10,20 @@
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
- "eos_token": "<eos>",
22
- "pad_token": "<pad>",
 
 
 
 
 
 
 
 
 
 
 
 
23
  "unk_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
 
 
3
  "<think>",
4
+ "</think>"
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<|endoftext|>",
 
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
+ "eos_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
  "unk_token": {
28
  "content": "<|endoftext|>",
29
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ba28fcbeb095731859469f3fb1419742a2fea50d727f5352ffc4505c0e6dde
3
- size 15525957
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5c2a1454e12b996fb8e3def8c3770f1ab45e4a38fede20471d0e557e6de8dc7
3
+ size 15524464
tokenizer_config.json CHANGED
@@ -100,30 +100,6 @@
100
  "special": true
101
  },
102
  "200029": {
103
- "content": "<pad>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "200030": {
111
- "content": "<tools>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "200031": {
119
- "content": "</tools>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": true
125
- },
126
- "200032": {
127
  "content": "<think>",
128
  "lstrip": false,
129
  "normalized": false,
@@ -131,74 +107,26 @@
131
  "single_word": false,
132
  "special": true
133
  },
134
- "200033": {
135
  "content": "</think>",
136
  "lstrip": false,
137
  "normalized": false,
138
  "rstrip": false,
139
  "single_word": false,
140
  "special": true
141
- },
142
- "200034": {
143
- "content": "<tool_call>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": true
149
- },
150
- "200035": {
151
- "content": "</tool_call>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": true
157
- },
158
- "200036": {
159
- "content": "<tool_reponse>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": true
165
- },
166
- "200037": {
167
- "content": "</tool_reponse>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": true
173
- },
174
- "200038": {
175
- "content": "<eos>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": true
181
  }
182
  },
183
  "additional_special_tokens": [
184
- "<tools>",
185
- "</tools>",
186
  "<think>",
187
- "</think>",
188
- "<tool_call>",
189
- "</tool_call>",
190
- "<tool_reponse>",
191
- "</tool_reponse>",
192
- "<pad>",
193
- "<eos>"
194
  ],
195
  "bos_token": "<|endoftext|>",
196
- "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{{ '<start_of_turn>' + message['role'] + '\n' + message['content'] | trim + '<end_of_turn><eos>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
197
  "clean_up_tokenization_spaces": false,
198
- "eos_token": "<eos>",
199
  "extra_special_tokens": {},
200
  "model_max_length": 131072,
201
- "pad_token": "<pad>",
202
  "tokenizer_class": "GPT2Tokenizer",
203
  "unk_token": "<|endoftext|>"
204
  }
 
100
  "special": true
101
  },
102
  "200029": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  "content": "<think>",
104
  "lstrip": false,
105
  "normalized": false,
 
107
  "single_word": false,
108
  "special": true
109
  },
110
+ "200030": {
111
  "content": "</think>",
112
  "lstrip": false,
113
  "normalized": false,
114
  "rstrip": false,
115
  "single_word": false,
116
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
  },
119
  "additional_special_tokens": [
 
 
120
  "<think>",
121
+ "</think>"
 
 
 
 
 
 
122
  ],
123
  "bos_token": "<|endoftext|>",
124
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
125
  "clean_up_tokenization_spaces": false,
126
+ "eos_token": "<|endoftext|>",
127
  "extra_special_tokens": {},
128
  "model_max_length": 131072,
129
+ "pad_token": "<|endoftext|>",
130
  "tokenizer_class": "GPT2Tokenizer",
131
  "unk_token": "<|endoftext|>"
132
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f88d8e1e94b68ee14fd1ac639f1e3c2d9831ab942483f1651b812349959da896
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c887b52524c824bc3b3257e60c6b44085b290550950629322dc7b3a7d12dc069
3
  size 5624