Feat: Added Gradio support (#812)
Browse files* Added gradio support
* queuing and title
* pre-commit run
- README.md +8 -0
- requirements.txt +1 -0
- src/axolotl/cli/__init__.py +88 -1
- src/axolotl/cli/inference.py +11 -3
README.md
CHANGED
|
@@ -97,6 +97,10 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
|
|
| 97 |
# inference
|
| 98 |
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
| 99 |
--lora_model_dir="./lora-out"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
```
|
| 101 |
|
| 102 |
## Installation
|
|
@@ -919,6 +923,10 @@ Pass the appropriate flag to the train command:
|
|
| 919 |
cat /tmp/prompt.txt | python -m axolotl.cli.inference examples/your_config.yml \
|
| 920 |
--base_model="./completed-model" --prompter=None --load_in_8bit=True
|
| 921 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
|
| 923 |
Please use `--sample_packing False` if you have it on and receive the error similar to below:
|
| 924 |
|
|
|
|
| 97 |
# inference
|
| 98 |
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
| 99 |
--lora_model_dir="./lora-out"
|
| 100 |
+
|
| 101 |
+
# gradio
|
| 102 |
+
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
| 103 |
+
--lora_model_dir="./lora-out" --gradio
|
| 104 |
```
|
| 105 |
|
| 106 |
## Installation
|
|
|
|
| 923 |
cat /tmp/prompt.txt | python -m axolotl.cli.inference examples/your_config.yml \
|
| 924 |
--base_model="./completed-model" --prompter=None --load_in_8bit=True
|
| 925 |
```
|
| 926 |
+
-- With gradio hosting
|
| 927 |
+
```bash
|
| 928 |
+
python -m axolotl.cli.inference examples/your_config.yml --gradio
|
| 929 |
+
```
|
| 930 |
|
| 931 |
Please use `--sample_packing False` if you have it on and receive the error similar to below:
|
| 932 |
|
requirements.txt
CHANGED
|
@@ -31,3 +31,4 @@ scikit-learn==1.2.2
|
|
| 31 |
pynvml
|
| 32 |
art
|
| 33 |
fschat==0.2.29
|
|
|
|
|
|
| 31 |
pynvml
|
| 32 |
art
|
| 33 |
fschat==0.2.29
|
| 34 |
+
gradio
|
src/axolotl/cli/__init__.py
CHANGED
|
@@ -6,8 +6,10 @@ import os
|
|
| 6 |
import random
|
| 7 |
import sys
|
| 8 |
from pathlib import Path
|
|
|
|
| 9 |
from typing import Any, Dict, List, Optional, Union
|
| 10 |
|
|
|
|
| 11 |
import torch
|
| 12 |
import yaml
|
| 13 |
|
|
@@ -16,7 +18,7 @@ from accelerate.commands.config import config_args
|
|
| 16 |
from art import text2art
|
| 17 |
from huggingface_hub import HfApi
|
| 18 |
from huggingface_hub.utils import LocalTokenNotFoundError
|
| 19 |
-
from transformers import GenerationConfig, TextStreamer
|
| 20 |
|
| 21 |
from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer
|
| 22 |
from axolotl.logging_config import configure_logging
|
|
@@ -153,6 +155,91 @@ def do_inference(
|
|
| 153 |
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
| 154 |
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def choose_config(path: Path):
|
| 157 |
yaml_files = list(path.glob("*.yml"))
|
| 158 |
|
|
|
|
| 6 |
import random
|
| 7 |
import sys
|
| 8 |
from pathlib import Path
|
| 9 |
+
from threading import Thread
|
| 10 |
from typing import Any, Dict, List, Optional, Union
|
| 11 |
|
| 12 |
+
import gradio as gr
|
| 13 |
import torch
|
| 14 |
import yaml
|
| 15 |
|
|
|
|
| 18 |
from art import text2art
|
| 19 |
from huggingface_hub import HfApi
|
| 20 |
from huggingface_hub.utils import LocalTokenNotFoundError
|
| 21 |
+
from transformers import GenerationConfig, TextIteratorStreamer, TextStreamer
|
| 22 |
|
| 23 |
from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer
|
| 24 |
from axolotl.logging_config import configure_logging
|
|
|
|
| 155 |
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
| 156 |
|
| 157 |
|
| 158 |
+
def do_inference_gradio(
|
| 159 |
+
*,
|
| 160 |
+
cfg: DictDefault,
|
| 161 |
+
cli_args: TrainerCliArgs,
|
| 162 |
+
):
|
| 163 |
+
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
|
| 164 |
+
prompter = cli_args.prompter
|
| 165 |
+
default_tokens = {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>"}
|
| 166 |
+
|
| 167 |
+
for token, symbol in default_tokens.items():
|
| 168 |
+
# If the token isn't already specified in the config, add it
|
| 169 |
+
if not (cfg.special_tokens and token in cfg.special_tokens):
|
| 170 |
+
tokenizer.add_special_tokens({token: symbol})
|
| 171 |
+
|
| 172 |
+
prompter_module = None
|
| 173 |
+
if prompter:
|
| 174 |
+
prompter_module = getattr(
|
| 175 |
+
importlib.import_module("axolotl.prompters"), prompter
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
if cfg.landmark_attention:
|
| 179 |
+
from axolotl.monkeypatch.llama_landmark_attn import set_model_mem_id
|
| 180 |
+
|
| 181 |
+
set_model_mem_id(model, tokenizer)
|
| 182 |
+
model.set_mem_cache_args(
|
| 183 |
+
max_seq_len=255, mem_freq=50, top_k=5, max_cache_size=None
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
model = model.to(cfg.device)
|
| 187 |
+
|
| 188 |
+
def generate(instruction):
|
| 189 |
+
if not instruction:
|
| 190 |
+
return
|
| 191 |
+
if prompter_module:
|
| 192 |
+
# pylint: disable=stop-iteration-return
|
| 193 |
+
prompt: str = next(
|
| 194 |
+
prompter_module().build_prompt(instruction=instruction.strip("\n"))
|
| 195 |
+
)
|
| 196 |
+
else:
|
| 197 |
+
prompt = instruction.strip()
|
| 198 |
+
batch = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
|
| 199 |
+
|
| 200 |
+
model.eval()
|
| 201 |
+
with torch.no_grad():
|
| 202 |
+
generation_config = GenerationConfig(
|
| 203 |
+
repetition_penalty=1.1,
|
| 204 |
+
max_new_tokens=1024,
|
| 205 |
+
temperature=0.9,
|
| 206 |
+
top_p=0.95,
|
| 207 |
+
top_k=40,
|
| 208 |
+
bos_token_id=tokenizer.bos_token_id,
|
| 209 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 210 |
+
pad_token_id=tokenizer.pad_token_id,
|
| 211 |
+
do_sample=True,
|
| 212 |
+
use_cache=True,
|
| 213 |
+
return_dict_in_generate=True,
|
| 214 |
+
output_attentions=False,
|
| 215 |
+
output_hidden_states=False,
|
| 216 |
+
output_scores=False,
|
| 217 |
+
)
|
| 218 |
+
streamer = TextIteratorStreamer(tokenizer)
|
| 219 |
+
generation_kwargs = {
|
| 220 |
+
"inputs": batch["input_ids"].to(cfg.device),
|
| 221 |
+
"generation_config": generation_config,
|
| 222 |
+
"streamer": streamer,
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 226 |
+
thread.start()
|
| 227 |
+
|
| 228 |
+
all_text = ""
|
| 229 |
+
|
| 230 |
+
for new_text in streamer:
|
| 231 |
+
all_text += new_text
|
| 232 |
+
yield all_text
|
| 233 |
+
|
| 234 |
+
demo = gr.Interface(
|
| 235 |
+
fn=generate,
|
| 236 |
+
inputs="textbox",
|
| 237 |
+
outputs="text",
|
| 238 |
+
title=cfg.get("gradio_title", "Axolotl Gradio Interface"),
|
| 239 |
+
)
|
| 240 |
+
demo.queue().launch(show_api=False, share=True)
|
| 241 |
+
|
| 242 |
+
|
| 243 |
def choose_config(path: Path):
|
| 244 |
yaml_files = list(path.glob("*.yml"))
|
| 245 |
|
src/axolotl/cli/inference.py
CHANGED
|
@@ -6,11 +6,16 @@ from pathlib import Path
|
|
| 6 |
import fire
|
| 7 |
import transformers
|
| 8 |
|
| 9 |
-
from axolotl.cli import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from axolotl.common.cli import TrainerCliArgs
|
| 11 |
|
| 12 |
|
| 13 |
-
def do_cli(config: Path = Path("examples/"), **kwargs):
|
| 14 |
# pylint: disable=duplicate-code
|
| 15 |
print_axolotl_text_art()
|
| 16 |
parsed_cfg = load_cfg(config, **kwargs)
|
|
@@ -21,7 +26,10 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
|
|
| 21 |
)
|
| 22 |
parsed_cli_args.inference = True
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
if __name__ == "__main__":
|
|
|
|
| 6 |
import fire
|
| 7 |
import transformers
|
| 8 |
|
| 9 |
+
from axolotl.cli import (
|
| 10 |
+
do_inference,
|
| 11 |
+
do_inference_gradio,
|
| 12 |
+
load_cfg,
|
| 13 |
+
print_axolotl_text_art,
|
| 14 |
+
)
|
| 15 |
from axolotl.common.cli import TrainerCliArgs
|
| 16 |
|
| 17 |
|
| 18 |
+
def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs):
|
| 19 |
# pylint: disable=duplicate-code
|
| 20 |
print_axolotl_text_art()
|
| 21 |
parsed_cfg = load_cfg(config, **kwargs)
|
|
|
|
| 26 |
)
|
| 27 |
parsed_cli_args.inference = True
|
| 28 |
|
| 29 |
+
if gradio:
|
| 30 |
+
do_inference_gradio(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
| 31 |
+
else:
|
| 32 |
+
do_inference(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
| 33 |
|
| 34 |
|
| 35 |
if __name__ == "__main__":
|