mlabonne commited on
Commit
9bd7fab
·
verified ·
1 Parent(s): 72edd22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -43
app.py CHANGED
@@ -139,53 +139,73 @@ def get_license(models: list[pd.Series]) -> str:
139
 
140
 
141
  def create_config(models: list[pd.Series]) -> str:
142
- slerp_config = f"""
143
- slices:
144
- - sources:
145
- - model: {models[0]["Model"]}
146
- layer_range: [0, 32]
147
- - model: {models[1]["Model"]}
148
- layer_range: [0, 32]
149
- merge_method: slerp
150
- base_model: {models[0]["Model"]}
151
- parameters:
152
- t:
153
- - filter: self_attn
154
- value: [0, 0.5, 0.3, 0.7, 1]
155
- - filter: mlp
156
- value: [1, 0.5, 0.7, 0.3, 0]
157
- - value: 0.5
158
- dtype: bfloat16
159
- random_seed: 0
160
- """
161
- dare_config = f"""
162
  models:
163
- - model: mlabonne/Meta-Llama-3-8B
164
- # No parameters necessary for base model
165
- - model: {models[0]["Model"]}
166
- parameters:
167
- density: 0.53
168
- weight: 0.5
169
- - model: {models[1]["Model"]}
170
- parameters:
171
- density: 0.53
172
- weight: 0.5
173
  merge_method: dare_ties
174
- base_model: mlabonne/Meta-Llama-3-8B
175
  parameters:
176
- int8_mask: true
177
- dtype: bfloat16
178
- random_seed: 0
179
- """
180
- stock_config = f"""
181
- models:
182
- - model: mlabonne/Meta-Llama-3-8B
183
- - model: {models[0]["Model"]}
184
- - model: {models[1]["Model"]}
185
- merge_method: model_stock
186
- base_model: mlabonne/Meta-Llama-3-8B
187
- dtype: bfloat16
188
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
190
 
191
  with open('config.yaml', 'w', encoding="utf-8") as f:
 
139
 
140
 
141
  def create_config(models: list[pd.Series]) -> str:
142
+ slerp_config = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  models:
144
+ - model: NousResearch/Meta-Llama-3-8B
145
+ # No parameters necessary for base model
146
+ - model: NousResearch/Meta-Llama-3-8B-Instruct
147
+ parameters:
148
+ density: 0.6
149
+ weight: 0.5
150
+ - model: mlabonne/OrpoLlama-3-8B
151
+ parameters:
152
+ density: 0.55
153
+ weight: 0.05
154
  merge_method: dare_ties
155
+ base_model: NousResearch/Meta-Llama-3-8B
156
  parameters:
157
+ int8_mask: true
158
+ dtype: float16
 
 
 
 
 
 
 
 
 
 
159
  """
160
+ dare_config = slerp_config
161
+ stock_config = slerp_config
162
+ # slerp_config = f"""
163
+ # slices:
164
+ # - sources:
165
+ # - model: {models[0]["Model"]}
166
+ # layer_range: [0, 32]
167
+ # - model: {models[1]["Model"]}
168
+ # layer_range: [0, 32]
169
+ # merge_method: slerp
170
+ # base_model: {models[0]["Model"]}
171
+ # parameters:
172
+ # t:
173
+ # - filter: self_attn
174
+ # value: [0, 0.5, 0.3, 0.7, 1]
175
+ # - filter: mlp
176
+ # value: [1, 0.5, 0.7, 0.3, 0]
177
+ # - value: 0.5
178
+ # dtype: bfloat16
179
+ # random_seed: 0
180
+ # """
181
+ # dare_config = f"""
182
+ # models:
183
+ # - model: mlabonne/Meta-Llama-3-8B
184
+ # # No parameters necessary for base model
185
+ # - model: {models[0]["Model"]}
186
+ # parameters:
187
+ # density: 0.53
188
+ # weight: 0.5
189
+ # - model: {models[1]["Model"]}
190
+ # parameters:
191
+ # density: 0.53
192
+ # weight: 0.5
193
+ # merge_method: dare_ties
194
+ # base_model: mlabonne/Meta-Llama-3-8B
195
+ # parameters:
196
+ # int8_mask: true
197
+ # dtype: bfloat16
198
+ # random_seed: 0
199
+ # """
200
+ # stock_config = f"""
201
+ # models:
202
+ # - model: mlabonne/Meta-Llama-3-8B
203
+ # - model: {models[0]["Model"]}
204
+ # - model: {models[1]["Model"]}
205
+ # merge_method: model_stock
206
+ # base_model: mlabonne/Meta-Llama-3-8B
207
+ # dtype: bfloat16
208
+ # """
209
  yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
210
 
211
  with open('config.yaml', 'w', encoding="utf-8") as f: