File size: 3,408 Bytes
20aa964
 
 
 
 
 
 
a360f5e
20aa964
 
 
 
 
 
 
4b54665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44fe74d
3e2702a
 
4b54665
3e2702a
a360f5e
 
20aa964
a360f5e
20aa964
a360f5e
 
 
 
 
 
20aa964
a360f5e
 
 
 
 
 
 
20aa964
a360f5e
 
 
 
4b54665
 
20aa964
 
44fe74d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20aa964
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import shutil
import subprocess
import signal
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr

from huggingface_hub import HfApi
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch
from apscheduler.schedulers.background import BackgroundScheduler

HF_PATH = "https://huggingface.co/"

CONV_TEMPLATES = [
    "llama-3",
    "llama-3_1",
    "chatml",
    "chatml_nosystem",
    "qwen2",
    "open_hermes_mistral",
    "neural_hermes_mistral",
    "llama_default",
    "llama-2",
    "mistral_default",
    "gpt2",
    "codellama_completion",
    "codellama_instruct",
    "vicuna_v1.1",
    "conv_one_shot",
    "redpajama_chat",
    "rwkv_world",
    "rwkv",
    "gorilla",
    "gorilla-openfunctions-v2",
    "guanaco",
    "dolly",
    "oasst",
    "stablelm",
    "stablecode_completion",
    "stablecode_instruct",
    "minigpt",
    "moss",
    "LM",
    "stablelm-3b",
    "gpt_bigcode",
    "wizardlm_7b",
    "wizard_coder_or_math",
    "glm",
    "custom",  # for web-llm only
    "phi-2",
    "phi-3",
    "phi-3-vision",
    "stablelm-2",
    "gemma_instruction",
    "orion",
    "llava",
    "hermes2_pro_llama3",
    "hermes3_llama-3_1",
    "tinyllama_v1_0",
    "aya-23",
]

QUANTIZATIONS = ["q0f16", 
                 "q0f32", 
                 "q3f16_1", 
                 "q4f16_1", 
                 "q4f32_1", 
                 "q4f16_awq"]

def button_click(hf_model_id, conv_template, quantization, oauth_token: gr.OAuthToken | None):
    if not oauth_token.token:
        raise ValueError("Log in to Huggingface to use this")
    
    api = HfApi(token=oauth_token.token)
    model_dir_name = hf_model_id.split("/")[1]
    mlc_model_name = model_dir_name + "-" + quantization + "-" + "MLC"

    os.system("mkdir -p dist/models")
    os.system("git lfs install")

    api.snapshot_download(repo_id=hf_model_id, local_dir=f"./dist/models/{model_dir_name}")

    os.system("mlc_llm convert_weight ./dist/models/" + model_dir_name + "/" + \
              " --quantization " + quantization + \
              " -o dist/" + mlc_model_name)
    
    os.system("mlc_llm gen_config ./dist/models/" + model_dir_name + "/" + \
              " --quantization " + quantization + " --conv-template " + conv_template + \
              " -o dist/" + mlc_model_name + "/")
    
    # push to HF
    user_name = api.whoami()["name"]
    api.create_repo(repo_id=f"{user_name}/{mlc_model_name}", private=True)

    api.upload_large_folder(folder_path=f"./dist/{mlc_model_name}",
                  repo_id=f"{user_name}/{mlc_model_name}",
                  repo_type="model")
    
    os.system("rm -rf dist/")
    
    return "successful"

# demo = gr.Interface(
#     fn=button_click,
#     inputs = [gr.LoginButton(),
#               gr.Textbox(label="HF Model ID"),
#               gr.Dropdown(CONV_TEMPLATES, label="Conversation Template"),
#               gr.Dropdown(QUANTIZATIONS, label="Quantization Method")],
#     outputs = "text"
# )

with gr.Blocks() as demo:
    gr.LoginButton()
    model_id = gr.Textbox(label="HF Model ID")
    conv = gr.Dropdown(CONV_TEMPLATES, label="Conversation Template")
    quant = gr.Dropdown(QUANTIZATIONS, label="Quantization Method")
    btn = gr.Button("Convert!")
    out = gr.Textbox()
    btn.click(fn=button_click , inputs=[model_id, conv, quant], outputs=out)

demo.launch()