Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -139,53 +139,73 @@ def get_license(models: list[pd.Series]) -> str:
|
|
139 |
|
140 |
|
141 |
def create_config(models: list[pd.Series]) -> str:
|
142 |
-
slerp_config =
|
143 |
-
slices:
|
144 |
-
- sources:
|
145 |
-
- model: {models[0]["Model"]}
|
146 |
-
layer_range: [0, 32]
|
147 |
-
- model: {models[1]["Model"]}
|
148 |
-
layer_range: [0, 32]
|
149 |
-
merge_method: slerp
|
150 |
-
base_model: {models[0]["Model"]}
|
151 |
-
parameters:
|
152 |
-
t:
|
153 |
-
- filter: self_attn
|
154 |
-
value: [0, 0.5, 0.3, 0.7, 1]
|
155 |
-
- filter: mlp
|
156 |
-
value: [1, 0.5, 0.7, 0.3, 0]
|
157 |
-
- value: 0.5
|
158 |
-
dtype: bfloat16
|
159 |
-
random_seed: 0
|
160 |
-
"""
|
161 |
-
dare_config = f"""
|
162 |
models:
|
163 |
-
- model:
|
164 |
-
|
165 |
-
- model:
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
- model:
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
merge_method: dare_ties
|
174 |
-
base_model:
|
175 |
parameters:
|
176 |
-
int8_mask: true
|
177 |
-
dtype:
|
178 |
-
random_seed: 0
|
179 |
-
"""
|
180 |
-
stock_config = f"""
|
181 |
-
models:
|
182 |
-
- model: mlabonne/Meta-Llama-3-8B
|
183 |
-
- model: {models[0]["Model"]}
|
184 |
-
- model: {models[1]["Model"]}
|
185 |
-
merge_method: model_stock
|
186 |
-
base_model: mlabonne/Meta-Llama-3-8B
|
187 |
-
dtype: bfloat16
|
188 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
|
190 |
|
191 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|
|
|
139 |
|
140 |
|
141 |
def create_config(models: list[pd.Series]) -> str:
|
142 |
+
slerp_config = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
models:
|
144 |
+
- model: NousResearch/Meta-Llama-3-8B
|
145 |
+
# No parameters necessary for base model
|
146 |
+
- model: NousResearch/Meta-Llama-3-8B-Instruct
|
147 |
+
parameters:
|
148 |
+
density: 0.6
|
149 |
+
weight: 0.5
|
150 |
+
- model: mlabonne/OrpoLlama-3-8B
|
151 |
+
parameters:
|
152 |
+
density: 0.55
|
153 |
+
weight: 0.05
|
154 |
merge_method: dare_ties
|
155 |
+
base_model: NousResearch/Meta-Llama-3-8B
|
156 |
parameters:
|
157 |
+
int8_mask: true
|
158 |
+
dtype: float16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
"""
|
160 |
+
dare_config = slerp_config
|
161 |
+
stock_config = slerp_config
|
162 |
+
# slerp_config = f"""
|
163 |
+
# slices:
|
164 |
+
# - sources:
|
165 |
+
# - model: {models[0]["Model"]}
|
166 |
+
# layer_range: [0, 32]
|
167 |
+
# - model: {models[1]["Model"]}
|
168 |
+
# layer_range: [0, 32]
|
169 |
+
# merge_method: slerp
|
170 |
+
# base_model: {models[0]["Model"]}
|
171 |
+
# parameters:
|
172 |
+
# t:
|
173 |
+
# - filter: self_attn
|
174 |
+
# value: [0, 0.5, 0.3, 0.7, 1]
|
175 |
+
# - filter: mlp
|
176 |
+
# value: [1, 0.5, 0.7, 0.3, 0]
|
177 |
+
# - value: 0.5
|
178 |
+
# dtype: bfloat16
|
179 |
+
# random_seed: 0
|
180 |
+
# """
|
181 |
+
# dare_config = f"""
|
182 |
+
# models:
|
183 |
+
# - model: mlabonne/Meta-Llama-3-8B
|
184 |
+
# # No parameters necessary for base model
|
185 |
+
# - model: {models[0]["Model"]}
|
186 |
+
# parameters:
|
187 |
+
# density: 0.53
|
188 |
+
# weight: 0.5
|
189 |
+
# - model: {models[1]["Model"]}
|
190 |
+
# parameters:
|
191 |
+
# density: 0.53
|
192 |
+
# weight: 0.5
|
193 |
+
# merge_method: dare_ties
|
194 |
+
# base_model: mlabonne/Meta-Llama-3-8B
|
195 |
+
# parameters:
|
196 |
+
# int8_mask: true
|
197 |
+
# dtype: bfloat16
|
198 |
+
# random_seed: 0
|
199 |
+
# """
|
200 |
+
# stock_config = f"""
|
201 |
+
# models:
|
202 |
+
# - model: mlabonne/Meta-Llama-3-8B
|
203 |
+
# - model: {models[0]["Model"]}
|
204 |
+
# - model: {models[1]["Model"]}
|
205 |
+
# merge_method: model_stock
|
206 |
+
# base_model: mlabonne/Meta-Llama-3-8B
|
207 |
+
# dtype: bfloat16
|
208 |
+
# """
|
209 |
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
|
210 |
|
211 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|