mergekit-gui

Runtime error

App Files Files Community

Hjgugugjhuhjggg commited on Nov 3, 2024

Commit

011301a

verified ·

1 Parent(s): 9235353

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -56

app.py CHANGED Viewed

@@ -13,55 +13,25 @@ import torch
 import yaml
 from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
 from mergekit.config import MergeConfiguration
 from clean_community_org import garbage_collect_empty_models
 has_gpu = torch.cuda.is_available()
-# Running directly from Python doesn't work well with Gradio+run_process because of:
-# Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
-# Let's use the CLI instead.
-#
-# import mergekit.merge
-# from mergekit.common import parse_kmb
-# from mergekit.options import MergeOptions
-#
-# merge_options = (
-#     MergeOptions(
-#         copy_tokenizer=True,
-#         cuda=True,
-#         low_cpu_memory=True,
-#         write_model_card=True,
-#     )
-#     if has_gpu
-#     else MergeOptions(
-#         allow_crimes=True,
-#         out_shard_size=parse_kmb("1B"),
-#         lazy_unpickle=True,
-#         write_model_card=True,
-#     )
-# )
 cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
     " --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
 )
 MARKDOWN_DESCRIPTION = """
 # mergekit-gui
 The fastest way to perform a model merge 🔥
 Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
 """
 MARKDOWN_ARTICLE = """
 ___
 ## Merge Configuration
 [Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
 Below are the primary elements of a configuration file:
 - `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
 - `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
 - `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
@@ -69,11 +39,8 @@ Below are the primary elements of a configuration file:
 - `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
 - `dtype`: Specifies the data type used for the merging operation.
 - `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
 ## Merge Methods
 A quick overview of the currently supported merge methods:
 | Method                                                                                       | `merge_method` value | Multi-Model | Uses base model |
 | -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
 | Linear ([Model Soups](https://arxiv.org/abs/2203.05482))                                     | `linear`             | ✅          | ❌              |
@@ -84,13 +51,9 @@ A quick overview of the currently supported merge methods:
 | [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear`        | ✅          | ✅              |
 | Passthrough                                                                                  | `passthrough`        | ❌          | ❌              |
 | [Model Stock](https://arxiv.org/abs/2403.19522)                                              | `model_stock`        | ✅          | ✅              |
 ## Citation
 This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
 If you use it in your research, please cite the following paper:
 ```
 @article{goddard2024arcee,
   title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
@@ -99,19 +62,13 @@ If you use it in your research, please cite the following paper:
   year={2024}
 }
 ```
 This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
 """
 examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
-# Do not set community token as `HF_TOKEN` to avoid accidentally using it in merge scripts.
-# `COMMUNITY_HF_TOKEN` is used to upload models to the community organization (https://huggingface.co/mergekit-community)
-# when user do not provide a token.
 COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
-def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]]:
     runner = LogsViewRunner()
     if not yaml_config:
@@ -151,8 +108,7 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
         if not repo_name:
             yield runner.log("No repo name provided. Generating a random one.")
-            repo_name = f"mergekit-{merge_config.merge_method}"
-            # Make repo_name "unique" (no need to be extra careful on uniqueness)
             repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
             repo_name = repo_name.replace("/", "-").strip("-")
@@ -167,8 +123,7 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
             yield runner.log(f"Error creating repo {e}", level="ERROR")
             return
-        # Set tmp HF_HOME to avoid filling up disk Space
-        tmp_env = os.environ.copy()  # taken from https://stackoverflow.com/a/4453495
         tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
         full_cli = cli + f" --lora-merge-cache {tmpdirname}/.lora_cache"
         yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
@@ -186,7 +141,6 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
         )
         yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN_DESCRIPTION)
@@ -206,6 +160,11 @@ with gr.Blocks() as demo:
                 label="Repo name",
                 placeholder="Optional. Will create a random name if empty.",
             )
     button = gr.Button("Merge", variant="primary")
     logs = LogsView(label="Terminal output")
     gr.Examples(
@@ -218,11 +177,8 @@ with gr.Blocks() as demo:
     )
     gr.Markdown(MARKDOWN_ARTICLE)
-    button.click(fn=merge, inputs=[config, token, repo_name], outputs=[logs])
-# Run garbage collection every hour to keep the community org clean.
-# Empty models might exists if the merge fails abruptly (e.g. if user leaves the Space).
 def _garbage_collect_every_hour():
     while True:
         try:
@@ -231,8 +187,9 @@ def _garbage_collect_every_hour():
             print("Error running garbage collection", e)
         time.sleep(3600)
 pool = ThreadPoolExecutor()
-pool.submit(_garbage_collect_every_hour)
-demo.queue(default_concurrency_limit=1).launch()

 import yaml
 from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
 from mergekit.config import MergeConfiguration
 from clean_community_org import garbage_collect_empty_models
 has_gpu = torch.cuda.is_available()
 cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
     " --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
 )
 MARKDOWN_DESCRIPTION = """
 # mergekit-gui
 The fastest way to perform a model merge 🔥
 Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
 """
 MARKDOWN_ARTICLE = """
 ___
 ## Merge Configuration
 [Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
 Below are the primary elements of a configuration file:
 - `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
 - `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
 - `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
 - `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
 - `dtype`: Specifies the data type used for the merging operation.
 - `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
 ## Merge Methods
 A quick overview of the currently supported merge methods:
 | Method                                                                                       | `merge_method` value | Multi-Model | Uses base model |
 | -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
 | Linear ([Model Soups](https://arxiv.org/abs/2203.05482))                                     | `linear`             | ✅          | ❌              |
 | [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear`        | ✅          | ✅              |
 | Passthrough                                                                                  | `passthrough`        | ❌          | ❌              |
 | [Model Stock](https://arxiv.org/abs/2403.19522)                                              | `model_stock`        | ✅          | ✅              |
 ## Citation
 This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
 If you use it in your research, please cite the following paper:
 ```
 @article{goddard2024arcee,
   title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
   year={2024}
 }
 ```
 This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
 """
 examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
 COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
+def merge(yaml_config: str, hf_token: str, repo_name: str, profile_name: str) -> Iterable[List[Log]]:
     runner = LogsViewRunner()
     if not yaml_config:
         if not repo_name:
             yield runner.log("No repo name provided. Generating a random one.")
+            repo_name = f"{profile_name}/mergekit-{merge_config.merge_method}" if profile_name else f"mergekit-{merge_config.merge_method}"
             repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
             repo_name = repo_name.replace("/", "-").strip("-")
             yield runner.log(f"Error creating repo {e}", level="ERROR")
             return
+        tmp_env = os.environ.copy()
         tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
         full_cli = cli + f" --lora-merge-cache {tmpdirname}/.lora_cache"
         yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
         )
         yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN_DESCRIPTION)
                 label="Repo name",
                 placeholder="Optional. Will create a random name if empty.",
             )
+            profile_name = gr.Textbox(
+                lines=1,
+                label="Hugging Face Profile Name",
+                placeholder="Enter your Hugging Face profile name.",
+            )
     button = gr.Button("Merge", variant="primary")
     logs = LogsView(label="Terminal output")
     gr.Examples(
     )
     gr.Markdown(MARKDOWN_ARTICLE)
+    button.click(fn=merge, inputs=[config, token, repo_name, profile_name], outputs=[logs])
 def _garbage_collect_every_hour():
     while True:
         try:
             print("Error running garbage collection", e)
         time.sleep(3600)
 pool = ThreadPoolExecutor()
+pool.submit(_gar
+bage_collect_every_hour)
+demo.queue(default_concurrency_limit=1).launch()