# Clone Mergekit and Install the dependencies

In [None]:
!nvidia-smi

In [None]:
!pip install -qqq git+https://github.com/arcee-ai/mergekit.git

# Mergekit Config

In [None]:
# @markdown What is your model's name will be?
MODEL_NAME = 'SmolMoE' # @param {type:"string"}

In [None]:
mergekit_yaml = """
base_model: BEE-spoke-data/smol_llama-220M-GQA
gate_mode: random
dtype: bfloat16
experts:
  - source_model: BEE-spoke-data/smol_llama-220M-GQA
  - source_model: BEE-spoke-data/smol_llama-220M-GQA
""" # @param {type:"string"}
with open('config.yaml', 'w', encoding="utf-8") as f:
    f.write(mergekit_yaml)

# Mergekit Runtime

In [None]:
low_cpu_ram = True # @param {type:"boolean"}
runtime = "GPU" # @param ["CPU", "GPU"]
task = "merge-mega" # @param ["merge", "merge-mega", "moe", "extract"]
# @markdown ### Mergekit arguments

trust_remote_code = False # @param {type:"boolean"}
clone_tensors = True # @param {type:"boolean"}
low_ram = True # @param {type:"boolean"}
out_shard_size = "500M" # @param {type:"string"}

# @markdown ### Extract LoRA (experimental)
base_model = "unsloth/Llama-3.2-3B-Instruct" # @param {type:"string"}
finetuned_model = "theprint/ReWiz-Llama-3.2-3B" # @param {type:"string"}
extract_rank = 32 # @param {type:"number"}

## Run the program

In [None]:
import os
import shutil

def empty_folder(folder_path):
  if os.path.exists(folder_path):
    shutil.rmtree(folder_path)
    os.makedirs(folder_path)

empty_folder('merge')
empty_folder('lora')

if task == "merge":
    cli = "mergekit-yaml"
elif task == "merge-mega":
    cli = "mergekit-mega"
elif task == "moe":
    cli = "mergekit-moe"
elif task == "extract":
    if base_model == "" or finetuned_model == "":
        raise ValueError("base_model and finetuned_model cannot be empty")
    !pip install -qqq bitsandbytes
    cli = f"mergekit-extract-lora {finetuned_model} {base_model} lora --rank={extract_rank}"

if task in ["merge", "moe", "merge-mega"]:
    cli += " config.yaml merge --copy-tokenizer --allow-crimes"
    if runtime == "GPU":
        if task in ["merge", "merge-mega"]:
            cli += " --cuda"
        elif task == "moe":
            cli += " --device cuda --cuda"
    else:
        cli += " --no-cuda"

    if trust_remote_code:
        cli += " --trust-remote-code"
    if clone_tensors:
        cli += " --clone-tensors"
    if low_ram:
        cli += f" --out-shard-size {out_shard_size} --lazy-unpickle"
        if low_cpu_ram:
            cli += " --low-cpu-memory"
print(cli)
!{cli}

# Inference the Model

In [None]:
!pip install -qU transformers bitsandbytes accelerate
from transformers import AutoTokenizer, pipeline
import torch

model = "merge"

tokenizer = AutoTokenizer.from_pretrained(model)
generator = pipeline(
    "text-generation",
    model=model,
    model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": False},
)

In [None]:
messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])

# Upload to Hugging Face

In [None]:
# @title ## Upload model to Hugging Face { display-mode: "form" }
# @markdown Enter your HF username and the name of Colab secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens).
username = 'username' # @param {type:"string"}
token_env = 'hf_token' # @param {type:"string"}

!pip install -qU huggingface_hub

import os
import yaml

from huggingface_hub import HfApi
from google.colab import userdata

def output_dir():
    if os.path.exists('merge') and os.listdir('merge'):
        return "merge"
    if os.path.exists('lora') and os.listdir('lora'):
        return "lora"
    raise ValueError("Both folders are empty or do not exist.")


# Defined in the secrets tab in Google Colab
api = HfApi(token=userdata.get(token_env))
try:
    output_dir=output_dir()
    api.create_repo(
        repo_id=f"{username}/{MODEL_NAME}",
        repo_type="model",
        exist_ok=True,
    )
    api.upload_folder(
        repo_id=f"{username}/{MODEL_NAME}",
        folder_path=output_dir,
    )
except ValueError as e:
    print(e)