DataPilot
/

ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.1

@@ -13,227 +13,97 @@ license: gemma
 ## How to use
-**注意:** 以下のコードを実行する前に、必要なライブラリをインストールしてください。特に `transformers` ライブラリは Gemma 3 をサポートするバージョン (4.50.0 以降) が必要です。また、Unsloth を使用してファインチューニングされたモデルの場合、推論時にも Unsloth が必要になる場合があります。
 ```sh
 pip install -U transformers accelerate torch
-# vLLM を使用する場合
-pip install vllm
-# Unsloth が推論に必要となる場合
-pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" # 環境に合わせて調整
 ```
-### vLLM での推論 (テキスト生成)
-vLLM を使用すると、高速なテキスト生成推論が可能です。（2025年3月現在、vLLMのGemma 3マルチモーダル対応は進行中の可能性があります。最新情報はvLLMのドキュメントをご確認ください。）
 ```python
-from vllm import LLM, SamplingParams
-# モデル名を指定
-model_name = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.1"
-# またはローカルパスを指定
-# model_name = "/path/to/your/model"
-# LLMインスタンスを作成
-# tensor_parallel_size は利用可能なGPU数に合わせて調整してください
-llm = LLM(model=model_name, trust_remote_code=True) # Unslothモデルの場合など必要に応じて trust_remote_code=True
-# サンプリングパラメータを設定
-sampling_params = SamplingParams(temperature=0.1, top_p=0.95, max_tokens=200)
-prompt = "<start_of_turn>user\n日本の首都はどこですか？<end_of_turn>\n<start_of_turn>model\n"
-# 推論を実行
-outputs = llm.generate(prompt, sampling_params)
-# 結果を表示
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-```
-### Transformers での推論 (テキストのみ)
-`transformers` ライブラリを使用して、テキストプロンプト（システムプロンプトとユーザープロンプトを含む）に基づいてテキストを生成します。
-```python
-from transformers import pipeline, AutoTokenizer
-import torch
-# モデル名とトークナイザーを指定
-model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.1"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# パイプラインを作成
-pipe = pipeline(
-    "text-generation", # Gemma 3 のテキスト生成には text-generation が適切
-    model=model_id,
-    tokenizer=tokenizer, # 明示的にトークナイザーを渡す
-    device="cuda", # GPUが利用可能な場合
-    torch_dtype=torch.bfloat16 # Gemma 3 推奨のデータ型
-)
-# チャット形式のメッセージを作成
 messages = [
     {
         "role": "system",
-        "content": "あなたは親切なアシスタントです。" # システムプロンプト
     },
     {
         "role": "user",
-        "content": "Unslothとは何ですか？簡単に説明してください。" # ユーザープロンプト
     }
 ]
-# チャットテンプレートを適用
-# apply_chat_template は内部で <start_of_turn>などを付与します
-prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-# 推論を実行
-# max_new_tokens は生成する最大トークン数
-# do_sample=True にすると、多様な応答が生成されやすくなります
-outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.2, top_p=0.95)
-# 生成されたテキストのみを表示 (入力プロンプト部分を除く)
-generated_text = outputs[0]['generated_text'][len(prompt):]
-print(generated_text)
-# --- AutoModelForCausalLM を使う場合 ---
-# from transformers import AutoModelForCausalLM
-# model = AutoModelForCausalLM.from_pretrained(
-#     model_id,
-#     torch_dtype=torch.bfloat16,
-#     device_map="auto", # GPUに自動で配置
-#     # Unslothモデルの場合、追加の引数が必要な��合があります
-# )
-# model.eval()
-# inputs = tokenizer.apply_chat_template(
-#     messages,
-#     add_generation_prompt=True,
-#     return_tensors="pt"
-# ).to(model.device)
-# input_len = inputs.shape[-1]
-# with torch.inference_mode():
-#     generation_output = model.generate(
-#         inputs,
-#         max_new_tokens=256,
-#         do_sample=True,
-#         temperature=0.7,
-#         top_p=0.95,
-#     )
-#     # 入力部分を除いた生成トークンを取得
-#     generated_tokens = generation_output[0][input_len:]
-#     decoded = tokenizer.decode(generated_tokens, skip_special_tokens=True)
-#     print(decoded)
-```
-### Transformers での推論 (画像とテキスト)
-`transformers` ライブラリを使用して、画像とテキストプロンプトに基づいてテキストを生成します。
 ```python
-from transformers import pipeline, AutoProcessor
 import torch
-from PIL import Image
-import requests
-# モデル名、プロセッサーを指定
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.1"
 processor = AutoProcessor.from_pretrained(model_id)
-# パイプラインを作成 (image-text-to-textタスク)
-pipe = pipeline(
-    "image-text-to-text",
-    model=model_id,
-    processor=processor, # 明示的にプロセッサーを渡す
-    device="cuda", # GPUが利用可能な場合
-    torch_dtype=torch.bfloat16 # Gemma 3 推奨のデータ型
-    # Unslothモデルの場合、追加の引数が必要な場合があります
-)
-# 画像のURL
-image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
-# 画像を読み込む (ローカルファイルの場合は Image.open("path/to/image.jpg") )
-image = Image.open(requests.get(image_url, stream=True).raw)
-# チャット形式のメッセージを作成 (画像とテキストを含む)
 messages = [
     {
         "role": "system",
-        "content": [{"type": "text", "text": "You are a helpful assistant."}]
     },
     {
         "role": "user",
         "content": [
-            {"type": "image"}, # 画像のプレースホルダー
-            {"type": "text", "text": "この画像について詳しく説明してください。"} # テキストプロンプト
         ]
     }
 ]
-# 推論を実行 (images引数で画像を渡す)
-# max_new_tokens は生成する最大トークン数
-outputs = pipe(messages, images=image, max_new_tokens=200)
-# 生成されたテキストを表示
-# パイプラインの出力形式に合わせて調整が必要な場合があります
-# Gemma 3の場合、最後のメッセージのcontentを取り出すことが多いです
-print(outputs[0]["generated_text"][-1]["content"])
-# --- Gemma3ForConditionalGeneration を使う場合 ---
-# from transformers import Gemma3ForConditionalGeneration
-# model = Gemma3ForConditionalGeneration.from_pretrained(
-#     model_id,
-#     torch_dtype=torch.bfloat16,
-#     device_map="auto" # GPUに自動で配置
-#     # Unslothモデルの場合、追加の引数が必要な場合があります
-# ).eval()
-# # 画像を含むメッセージを作成 (Imageオブジェクトを直接渡す)
-# messages_for_processor = [
-#     {
-#         "role": "system",
-#         "content": [{"type": "text", "text": "You are a helpful assistant."}]
-#     },
-#     {
-#         "role": "user",
-#         "content": [
-#             {"type": "image", "image": image}, # PIL Image オブジェクト
-#             {"type": "text", "text": "この画像について詳しく説明してください。"}
-#         ]
-#     }
-# ]
-# # プロセッサーで入力を作成
-# inputs = processor.apply_chat_template(
-#     messages_for_processor,
-#     add_generation_prompt=True,
-#     tokenize=True, # トークン化を有効に
-#     return_dict=True,
-#     return_tensors="pt"
-# ).to(model.device) # モデルと同じデバイスに移動
-# input_len = inputs["input_ids"].shape[-1]
-# # 推論実行
-# with torch.inference_mode():
-#     generation = model.generate(**inputs, max_new_tokens=200, do_sample=False)
-#     # 入力部分を除いた生成トークンを取得
-#     generation = generation[0][input_len:]
-# # デコードして表示
-# decoded = processor.decode(generation, skip_special_tokens=True)
-# print(decoded)
-```
 ## License
 このモデルは、ベースモデルである `google/gemma-3-4b-it` のライセンス条件に従います。詳細については、以下のリンクをご参照ください。

 ## How to use
+**注意:** 以下のコードを実行する前に、必要なライブラリをインストールしてください。特に `transformers` ライブラリは Gemma 3 をサポートするバージョン (4.50.0 以降) が必要です。
 ```sh
 pip install -U transformers accelerate torch
 ```
+### 画像付き推論
 ```python
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+from PIL import Image
+import requests
+import torch
+model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.1"
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id, device_map="auto"
+).eval()
+processor = AutoProcessor.from_pretrained(model_id)
 messages = [
     {
         "role": "system",
+        "content": [{"type": "text", "text": "あなたは素晴らしい日本語アシスタントです。"}]
     },
     {
         "role": "user",
+        "content": [
+            {"type": "image", "image": "https://cs.stanford.edu/people/rak248/VG_100K_2/2399540.jpg"},
+            {"type": "text", "text": "この画像を説明してください。"}
+        ]
     }
 ]
+inputs = processor.apply_chat_template(
+    messages, add_generation_prompt=True, tokenize=True,
+    return_dict=True, return_tensors="pt"
+).to(model.device, dtype=torch.bfloat16)
+input_len = inputs["input_ids"].shape[-1]
+with torch.inference_mode():
+    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+decoded = processor.decode(generation, skip_special_tokens=True)
+print(decoded)
+```
+### 画像無し推論
 ```python
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
 import torch
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.1"
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id, device_map="auto"
+).eval()
 processor = AutoProcessor.from_pretrained(model_id)
 messages = [
     {
         "role": "system",
+        "content": [{"type": "text", "text": "あなたは素晴らしい日本語アシスタントです。"}]
     },
     {
         "role": "user",
         "content": [
+            {"type": "text", "text": "GPT3やGPT3.5などと比べてGPT4はどこがすごいのでしょうか？"}
         ]
     }
 ]
+inputs = processor.apply_chat_template(
+    messages, add_generation_prompt=True, tokenize=True,
+    return_dict=True, return_tensors="pt"
+).to(model.device, dtype=torch.bfloat16)
+input_len = inputs["input_ids"].shape[-1]
+with torch.inference_mode():
+    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+decoded = processor.decode(generation, skip_special_tokens=True)
+print(decoded)
+```
 ## License
 このモデルは、ベースモデルである `google/gemma-3-4b-it` のライセンス条件に従います。詳細については、以下のリンクをご参照ください。