Hjgugugjhuhjggg commited on
Commit
011301a
Β·
verified Β·
1 Parent(s): 9235353

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -56
app.py CHANGED
@@ -13,55 +13,25 @@ import torch
13
  import yaml
14
  from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
15
  from mergekit.config import MergeConfiguration
16
-
17
  from clean_community_org import garbage_collect_empty_models
18
 
19
  has_gpu = torch.cuda.is_available()
20
 
21
- # Running directly from Python doesn't work well with Gradio+run_process because of:
22
- # Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
23
- # Let's use the CLI instead.
24
- #
25
- # import mergekit.merge
26
- # from mergekit.common import parse_kmb
27
- # from mergekit.options import MergeOptions
28
- #
29
- # merge_options = (
30
- # MergeOptions(
31
- # copy_tokenizer=True,
32
- # cuda=True,
33
- # low_cpu_memory=True,
34
- # write_model_card=True,
35
- # )
36
- # if has_gpu
37
- # else MergeOptions(
38
- # allow_crimes=True,
39
- # out_shard_size=parse_kmb("1B"),
40
- # lazy_unpickle=True,
41
- # write_model_card=True,
42
- # )
43
- # )
44
-
45
  cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
46
  " --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
47
  )
48
 
49
  MARKDOWN_DESCRIPTION = """
50
  # mergekit-gui
51
-
52
  The fastest way to perform a model merge πŸ”₯
53
-
54
  Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
55
  """
56
 
57
  MARKDOWN_ARTICLE = """
58
  ___
59
-
60
  ## Merge Configuration
61
-
62
  [Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
63
  Below are the primary elements of a configuration file:
64
-
65
  - `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
66
  - `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
67
  - `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
@@ -69,11 +39,8 @@ Below are the primary elements of a configuration file:
69
  - `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
70
  - `dtype`: Specifies the data type used for the merging operation.
71
  - `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
72
-
73
  ## Merge Methods
74
-
75
  A quick overview of the currently supported merge methods:
76
-
77
  | Method | `merge_method` value | Multi-Model | Uses base model |
78
  | -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
79
  | Linear ([Model Soups](https://arxiv.org/abs/2203.05482)) | `linear` | βœ… | ❌ |
@@ -84,13 +51,9 @@ A quick overview of the currently supported merge methods:
84
  | [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear` | βœ… | βœ… |
85
  | Passthrough | `passthrough` | ❌ | ❌ |
86
  | [Model Stock](https://arxiv.org/abs/2403.19522) | `model_stock` | βœ… | βœ… |
87
-
88
-
89
  ## Citation
90
-
91
  This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
92
  If you use it in your research, please cite the following paper:
93
-
94
  ```
95
  @article{goddard2024arcee,
96
  title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
@@ -99,19 +62,13 @@ If you use it in your research, please cite the following paper:
99
  year={2024}
100
  }
101
  ```
102
-
103
  This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
104
  """
105
 
106
  examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
107
-
108
- # Do not set community token as `HF_TOKEN` to avoid accidentally using it in merge scripts.
109
- # `COMMUNITY_HF_TOKEN` is used to upload models to the community organization (https://huggingface.co/mergekit-community)
110
- # when user do not provide a token.
111
  COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
112
 
113
-
114
- def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]]:
115
  runner = LogsViewRunner()
116
 
117
  if not yaml_config:
@@ -151,8 +108,7 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
151
 
152
  if not repo_name:
153
  yield runner.log("No repo name provided. Generating a random one.")
154
- repo_name = f"mergekit-{merge_config.merge_method}"
155
- # Make repo_name "unique" (no need to be extra careful on uniqueness)
156
  repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
157
  repo_name = repo_name.replace("/", "-").strip("-")
158
 
@@ -167,8 +123,7 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
167
  yield runner.log(f"Error creating repo {e}", level="ERROR")
168
  return
169
 
170
- # Set tmp HF_HOME to avoid filling up disk Space
171
- tmp_env = os.environ.copy() # taken from https://stackoverflow.com/a/4453495
172
  tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
173
  full_cli = cli + f" --lora-merge-cache {tmpdirname}/.lora_cache"
174
  yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
@@ -186,7 +141,6 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
186
  )
187
  yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
188
 
189
-
190
  with gr.Blocks() as demo:
191
  gr.Markdown(MARKDOWN_DESCRIPTION)
192
 
@@ -206,6 +160,11 @@ with gr.Blocks() as demo:
206
  label="Repo name",
207
  placeholder="Optional. Will create a random name if empty.",
208
  )
 
 
 
 
 
209
  button = gr.Button("Merge", variant="primary")
210
  logs = LogsView(label="Terminal output")
211
  gr.Examples(
@@ -218,11 +177,8 @@ with gr.Blocks() as demo:
218
  )
219
  gr.Markdown(MARKDOWN_ARTICLE)
220
 
221
- button.click(fn=merge, inputs=[config, token, repo_name], outputs=[logs])
222
-
223
 
224
- # Run garbage collection every hour to keep the community org clean.
225
- # Empty models might exists if the merge fails abruptly (e.g. if user leaves the Space).
226
  def _garbage_collect_every_hour():
227
  while True:
228
  try:
@@ -231,8 +187,9 @@ def _garbage_collect_every_hour():
231
  print("Error running garbage collection", e)
232
  time.sleep(3600)
233
 
234
-
235
  pool = ThreadPoolExecutor()
236
- pool.submit(_garbage_collect_every_hour)
 
 
237
 
238
- demo.queue(default_concurrency_limit=1).launch()
 
13
  import yaml
14
  from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
15
  from mergekit.config import MergeConfiguration
 
16
  from clean_community_org import garbage_collect_empty_models
17
 
18
  has_gpu = torch.cuda.is_available()
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
21
  " --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
22
  )
23
 
24
  MARKDOWN_DESCRIPTION = """
25
  # mergekit-gui
 
26
  The fastest way to perform a model merge πŸ”₯
 
27
  Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
28
  """
29
 
30
  MARKDOWN_ARTICLE = """
31
  ___
 
32
  ## Merge Configuration
 
33
  [Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
34
  Below are the primary elements of a configuration file:
 
35
  - `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
36
  - `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
37
  - `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
 
39
  - `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
40
  - `dtype`: Specifies the data type used for the merging operation.
41
  - `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
 
42
  ## Merge Methods
 
43
  A quick overview of the currently supported merge methods:
 
44
  | Method | `merge_method` value | Multi-Model | Uses base model |
45
  | -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
46
  | Linear ([Model Soups](https://arxiv.org/abs/2203.05482)) | `linear` | βœ… | ❌ |
 
51
  | [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear` | βœ… | βœ… |
52
  | Passthrough | `passthrough` | ❌ | ❌ |
53
  | [Model Stock](https://arxiv.org/abs/2403.19522) | `model_stock` | βœ… | βœ… |
 
 
54
  ## Citation
 
55
  This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
56
  If you use it in your research, please cite the following paper:
 
57
  ```
58
  @article{goddard2024arcee,
59
  title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
 
62
  year={2024}
63
  }
64
  ```
 
65
  This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
66
  """
67
 
68
  examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
 
 
 
 
69
  COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
70
 
71
+ def merge(yaml_config: str, hf_token: str, repo_name: str, profile_name: str) -> Iterable[List[Log]]:
 
72
  runner = LogsViewRunner()
73
 
74
  if not yaml_config:
 
108
 
109
  if not repo_name:
110
  yield runner.log("No repo name provided. Generating a random one.")
111
+ repo_name = f"{profile_name}/mergekit-{merge_config.merge_method}" if profile_name else f"mergekit-{merge_config.merge_method}"
 
112
  repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
113
  repo_name = repo_name.replace("/", "-").strip("-")
114
 
 
123
  yield runner.log(f"Error creating repo {e}", level="ERROR")
124
  return
125
 
126
+ tmp_env = os.environ.copy()
 
127
  tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
128
  full_cli = cli + f" --lora-merge-cache {tmpdirname}/.lora_cache"
129
  yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
 
141
  )
142
  yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
143
 
 
144
  with gr.Blocks() as demo:
145
  gr.Markdown(MARKDOWN_DESCRIPTION)
146
 
 
160
  label="Repo name",
161
  placeholder="Optional. Will create a random name if empty.",
162
  )
163
+ profile_name = gr.Textbox(
164
+ lines=1,
165
+ label="Hugging Face Profile Name",
166
+ placeholder="Enter your Hugging Face profile name.",
167
+ )
168
  button = gr.Button("Merge", variant="primary")
169
  logs = LogsView(label="Terminal output")
170
  gr.Examples(
 
177
  )
178
  gr.Markdown(MARKDOWN_ARTICLE)
179
 
180
+ button.click(fn=merge, inputs=[config, token, repo_name, profile_name], outputs=[logs])
 
181
 
 
 
182
  def _garbage_collect_every_hour():
183
  while True:
184
  try:
 
187
  print("Error running garbage collection", e)
188
  time.sleep(3600)
189
 
 
190
  pool = ThreadPoolExecutor()
191
+ pool.submit(_gar
192
+
193
+ bage_collect_every_hour)
194
 
195
+ demo.queue(default_concurrency_limit=1).launch()