--- base_model: google/gemma-2-9b tags: - text-generation-inference - transformers - unsloth - gemma2 - trl license: cc-by-sa-4.0 language: - en - ja --- # Uploaded model - **Developed by:** Kohsaku - **License:** CC BY-NC-ND 4.0 - **Finetuned from model :** google/gemma-2-9b This gemma2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library. [](https://github.com/unslothai/unsloth) 推論コード なお、環境変数 HF_TOKENは別途設定されているものとします。 ``` pip install # Colabratory例 !pip uninstall unsloth -y !pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" !pip install --upgrade torch !pip install --upgrade xformers !pip install ipywidgets --upgrade import torch if torch.cuda.get_device_capability()[0] >= 8: !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3" ``` ``` sample code from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from unsloth import FastLanguageModel import torch import json model_name = "Kohsaku/gemma-2-9b-finetune-4" max_seq_length = 1024 dtype = None load_in_4bit = True model, tokenizer = FastLanguageModel.from_pretrained( model_name = model_name, max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, token = HF_TOKEN, ) FastLanguageModel.for_inference(model) text = "自然言語処理とは何か" tokenized_input = tokenizer.encode(text, add_special_tokens=True , return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( tokenized_input, max_new_tokens = 1024, use_cache = True, do_sample=False, repetition_penalty=1.2 )[0] print(tokenizer.decode(output)) # ELYZA-tasks-100-TVによる評価 # ELYZA-tasks-100-TVの読み込み。事前にファイルをアップロードしてください # データセットの読み込み。 # omnicampusの開発環境では、左にタスクのjsonlをドラッグアンドドロップしてから実行。 datasets = [] with open("elyza-tasks-100-TV_0.jsonl", "r") as f: item = "" for line in f: line = line.strip() item += line if item.endswith("}"): datasets.append(json.loads(item)) item = "" # 学習したモデルを用いてタスクを実行 from tqdm import tqdm # 推論するためにモデルのモードを変更 FastLanguageModel.for_inference(model) results = [] for dt in tqdm(datasets): input = dt["input"] prompt = f"""### 指示\n{input}\n### 回答\n""" inputs = tokenizer([prompt], return_tensors = "pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2) prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1] results.append({"task_id": dt["task_id"], "input": input, "output": prediction}) # jsonlで保存 with open(f"{model_name.split('/')[-1]}_outputs.jsonl", 'w', encoding='utf-8') as f: for result in results: json.dump(result, f, ensure_ascii=False) f.write('\n') ```