File size: 11,069 Bytes
ba6030d
fc17cd4
ba6030d
 
 
 
 
 
bbfe612
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc17cd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbfe612
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc17cd4
ba6030d
 
 
 
 
 
 
 
 
 
 
 
fc17cd4
 
ba6030d
 
 
 
 
 
 
 
fc17cd4
bbfe612
fc17cd4
 
ce641f7
ba6030d
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
import os
import sys
import time
import random
import yaml
import subprocess

import runpod
import gradio as gr
import pandas as pd
from jinja2 import Template
from huggingface_hub import ModelCard, ModelCardData, HfApi, repo_info
from huggingface_hub.utils import RepositoryNotFoundError

# Set environment variables
HF_TOKEN = os.environ.get("HF_TOKEN")
runpod.api_key = os.environ.get("RUNPOD_TOKEN")

# Parameters
USERNAME = 'automerger'
N_ROWS = 20
WAIT_TIME = 3600


# Logger from https://github.com/gradio-app/gradio/issues/2362
class Logger:
    def __init__(self, filename):
        self.terminal = sys.stdout
        self.log = open(filename, "w")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)
        
    def flush(self):
        self.terminal.flush()
        self.log.flush()
        
    def isatty(self):
        return False    


def create_dataset() -> bool:
    """
    Use Scrape Open LLM Leaderboard to create a CSV dataset.
    """
    command = ["python3", "scrape-open-llm-leaderboard/main.py", "-csv"]

    try:
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, text=True)
        print(f"scrape-open-llm-leaderboard: {result.stdout}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"scrape-open-llm-leaderboard: {e.stderr}")
        return False


def merge_models() -> None:
    """
    Use mergekit to create a merge.
    """
    command = ["mergekit-yaml", "config.yaml", "merge", "--copy-tokenizer", "--allow-crimes", "--out-shard-size", "1B", "--lazy-unpickle"]

    try:
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, text=True)
        print(f"mergekit: {result.stdout}")
    except subprocess.CalledProcessError as e:
        print(f"mergekit: {e.stderr}")


def make_df(file_path: str, n_rows: int) -> pd.DataFrame:
    """
    Create a filtered dataset from the Open LLM Leaderboard.
    """
    columns = ["Available on the hub", "Model sha", "T", "Type", "Precision",
              "Architecture", "Weight type", "Hub ❤️", "Flagged", "MoE"]
    ds = pd.read_csv(file_path)
    df = (
          ds[
            (ds["#Params (B)"] == 7.24) &
            (ds["Available on the hub"] == True) &
            (ds["Flagged"] == False) &
            (ds["MoE"] == False) &
            (ds["Weight type"] == "Original")
          ]
          .drop(columns=columns)
          .drop_duplicates(subset=["Model"])
          .iloc[:n_rows]
      )
    return df


def repo_exists(repo_id: str) -> bool:
    try:
        repo_info(repo_id)
        return True
    except RepositoryNotFoundError:
        return False


def get_name(models: list[pd.Series], username: str, version=0) -> str:
    model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
                 + models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
                 + "-7B"
    if version > 0:
        model_name = model_name.split("-")[0] + f"-v{version}-7B"

    if repo_exists(f"{username}/{model_name}"):
        get_name(models, username, version+1)

    return model_name


def get_license(models: list[pd.Series]) -> str:
    license1 = models[0]["Hub License"]
    license2 = models[1]["Hub License"]
    license = "cc-by-nc-4.0"

    if license1 == "cc-by-nc-4.0" or license2 == "cc-by-nc-4.0":
        license = "cc-by-nc-4.0"
    elif license1 == "apache-2.0" or license2 == "apache-2.0":
        license = "apache-2.0"
    elif license1 == "MIT" and license2 == "MIT":
        license = "MIT"
    return license


def create_config(models: list[pd.Series]) -> str:
    slerp_config = f"""
slices:
  - sources:
      - model: {models[0]["Model"]}
        layer_range: [0, 32]
      - model: {models[1]["Model"]}
        layer_range: [0, 32]
merge_method: slerp
base_model: {models[0]["Model"]}
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5
dtype: bfloat16
random_seed: 0
    """
    dare_config = f"""
models:
- model: {models[0]["Model"]}
  # No parameters necessary for base model
- model: {models[1]["Model"]}
  parameters:
    density: 0.53
    weight: 0.6
merge_method: dare_ties
base_model: {models[0]["Model"]}
parameters:
int8_mask: true
dtype: bfloat16
random_seed: 0
    """
    yaml_config = random.choices([slerp_config, dare_config], weights=[0.4, 0.6], k=1)[0]

    with open('config.yaml', 'w', encoding="utf-8") as f:
        f.write(yaml_config)

    return yaml_config


def create_model_card(yaml_config: str, model_name: str, username: str, license: str) -> None:
    template_text = """
---
license: {{ license }}
base_model:
{%- for model in models %}
  - {{ model }}
{%- endfor %}
tags:
- merge
- mergekit
- lazymergekit
---

# {{ model_name }}

{{ model_name }} is an automated merge created by [Maxime Labonne](https://huggingface.co/mlabonne) using the following configuration.

{%- for model in models %}
* [{{ model }}](https://huggingface.co/{{ model }})
{%- endfor %}

## 🧩 Configuration

```yaml
{{- yaml_config -}}
```

## 💻 Usage

```python
!pip install -qU transformers accelerate

from transformers import AutoTokenizer
import transformers
import torch

model = "{{ username }}/{{ model_name }}"
messages = [{"role": "user", "content": "What is a large language model?"}]

tokenizer = AutoTokenizer.from_pretrained(model)
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])
```
    """

    # Create a Jinja template object
    jinja_template = Template(template_text.strip())

    # Get list of models from config
    data = yaml.safe_load(yaml_config)
    if "models" in data:
        models = [data["models"][i]["model"] for i in range(len(data["models"])) if "parameters" in data["models"][i]]
    elif "parameters" in data:
        models = [data["slices"][0]["sources"][i]["model"] for i in range(len(data["slices"][0]["sources"]))]
    elif "slices" in data:
        models = [data["slices"][i]["sources"][0]["model"] for i in range(len(data["slices"]))]
    else:
        raise Exception("No models or slices found in yaml config")

    # Fill the template
    content = jinja_template.render(
        model_name=model_name,
        models=models,
        yaml_config=yaml_config,
        username=username,
        license=license
    )

    # Save the model card
    card = ModelCard(content)
    card.save('merge/README.md')


def upload_model(api: HfApi, username: str, model_name: str) -> None:
    api.create_repo(
        repo_id=f"{username}/{model_name}",
        repo_type="model",
        exist_ok=True,
    )
    api.upload_folder(
        repo_id=f"{username}/{model_name}",
        folder_path="merge",
    )


def create_pod(model_name: str, username: str, n=10, wait_seconds=10):
    for attempt in range(n):
        try:
            pod = runpod.create_pod(
                name=f"Automerge {model_name} on Nous",
                image_name="runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel-ubuntu22.04",
                gpu_type_id="NVIDIA GeForce RTX 3090",
                cloud_type="COMMUNITY",
                gpu_count=1,
                volume_in_gb=0,
                container_disk_in_gb=50,
                template_id="au6nz6emhk",
                env={
                    "BENCHMARK": "nous",
                    "MODEL_ID": f"{username}/{model_name}",
                    "REPO": "https://github.com/mlabonne/llm-autoeval.git",
                    "TRUST_REMOTE_CODE": False,
                    "DEBUG": False,
                    "GITHUB_API_TOKEN": os.environ["GITHUB_TOKEN"],
                }
            )
            print("Pod creation succeeded.")
            return pod
        except Exception as e:
            print(f"Attempt {attempt + 1} failed with error: {e}")
            if attempt < n - 1:
                print(f"Waiting {wait_seconds} seconds before retrying...")
                time.sleep(wait_seconds)
            else:
                print("All attempts failed. Giving up.")
                raise

def merge_loop():
    # Start HF API
    api = HfApi(token=HF_TOKEN)

    # Create dataset (proceed only if successful)
    if not create_dataset():
        print("Failed to create dataset. Skipping merge loop.")
        return

    df = make_df("open-llm-leaderboard.csv", N_ROWS)

    # Sample two models
    sample = df.sample(n=2)
    models = [sample.iloc[i] for i in range(2)]

    # Get model name
    model_name = get_name(models, USERNAME, version=0)
    print(model_name)

    # Get model license
    license = get_license(models)
    print(license)

    # Merge configs
    yaml_config = create_config(models)
    print(yaml_config)

    # Merge models
    merge_models()

    # Create model card
    create_model_card(yaml_config, model_name, USERNAME, license)

    # Upload model
    upload_model(api, USERNAME, model_name)

    # Evaluate model on Runpod
    create_pod(model_name, USERNAME)

# Install scrape-open-llm-leaderboard and mergekit
command = ["git", "clone", "-q", "https://github.com/Weyaxi/scrape-open-llm-leaderboard"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

command = ["pip", "install", "-r", "scrape-open-llm-leaderboard/requirements.txt"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
  
command = ["git", "clone", "https://github.com/arcee-ai/mergekit.git"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

command = ["pip", "install", "-e", "mergekit"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

sys.stdout = Logger("output.log")

# Gradio interface
title = """
<div align="center">
  <p style="font-size: 36px;">♾️ AutoMerger</p>
  <p style="font-size: 20px;">📝 <a href="https://medium.com/towards-data-science/merge-large-language-models-with-mergekit-2118fb392b54">Model merging</a> • 💻 <a href="https://github.com/arcee-ai/mergekit">Mergekit</a> • 🐦 <a href="https://twitter.com/maximelabonne">Follow me on X</a></p>
  <p><em>AutoMerger selects two 7B models on top of the Open LLM Leaderboard, combine them with a merge technique, and evaluate the resulting model.</em></p>
</div>
"""
with gr.Blocks(theme=gr.themes.Glass()) as demo:
    gr.Markdown(title)
    logs = gr.Textbox()
    demo.load(read_logs, None, logs, every=1)
demo.launch(server_name="0.0.0.0", prevent_thread_lock=True)

print("Start AutoMerger...")

# Main loop
while True:
    merge_loop()
    time.sleep(WAIT_TIME)