DataPilot
/

ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.2

@@ -24,182 +24,90 @@ Gemma 3ファミリーと同様に、テキスト入力と画像入力の両方
 pip install -U transformers accelerate Pillow requests torch
 ```
-また、vLLMを使用する場合は、vLLMをインストールしてください。
-```bash
-pip install vllm
-```
-### vLLMを使用した推論
-vLLMを使用することで、高速な推論が可能です。
 ```python
-from vllm import LLM, SamplingParams
-# モデルID
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.2"
-# サンプリングパラメータ (必要に応じて調整)
-sampling_params = SamplingParams(temperature=0.1, top_p=0.9, max_tokens=512)
-# LLMインスタンスの作成
-llm = LLM(model=model_id, trust_remote_code=True) # Gemma 3にはリモートコード実行が必要な場合があります
-# プロンプトの準備 (Gemma 3のチャットテンプレート形式を推奨)
-# vLLMは通常、tokenizerからチャットテンプレートを自動適用します
-# 手動で適用する場合は tokenizer.apply_chat_template を使用します
 messages = [
-    {"role": "system", "content": "あなたは親切なAIアシスタントです。"},
-    {"role": "user", "content": "日本の首都はどこですか？その都市の有名な観光地を3つ教えてください。"}
 ]
-# Hugging Face tokenizerを使ってチャットテンプレートを適用
-from transformers import AutoTokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-# 推論の実行
-outputs = llm.generate(prompt, sampling_params)
-# 結果の表示
-for output in outputs:
-    prompt_disp = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt_disp!r}")
-    print(f"Generated text: {generated_text!r}")
 ```
-### Transformersを使用した推論 (テキストのみ)
-テキスト入力のみで推論を行う場合の基本的なコードです。System PromptとUser Promptを使用します。
 ```python
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# モデルID
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.2"
-# トークナイザーとモデルのロード
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# Gemma 3 4B はメモリ要求が高いため、bf16を使用し、可能であれば複数GPUに分散します
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-    trust_remote_code=True # Gemma 3にはリモートコード実行が必要な場合があります
-)
-model.eval()
-# チャットメッセージの準備
-messages = [
-    {"role": "system", "content": "あなたは知識豊富で、質問に対して詳細に答えるAIアシスタントです。"},
-    {"role": "user", "content": "機械学習とは何か、初心者にもわかるように簡単に説明してください。"}
-]
-# チャットテンプレートを適用し、テンソルに変換
-inputs = tokenizer.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_tensors="pt"
-).to(model.device)
-# 入力トークン数の取得 (生成部分のみを後で抽出するため)
-input_len = inputs.shape[-1]
-# 推論の実行
-with torch.inference_mode():
-    outputs = model.generate(
-        inputs,
-        max_new_tokens=512, # 最大生成トークン数
-        do_sample=True,     # サンプリングを使用する場合
-        temperature=0.7,    # 生成の多様性
-        top_p=0.9           # Top-pサンプリング
-    )
-# 生成されたトークンのみをデコード
-generated_tokens = outputs[0][input_len:]
-response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
-print("--- モデルの応答 ---")
-print(response)
-```
-### Transformersを使用した推論 (画像 + テキスト)
-画像とテキストを組み合わせて入力し、推論を行う場合のコードです。
-```python
-import torch
-from transformers import AutoProcessor, Gemma3ForConditionalGeneration # または AutoModelForCausalLM
-from PIL import Image
-import requests
-# モデルID
-model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.2"
-# プロセッサーとモデルのロード
 processor = AutoProcessor.from_pretrained(model_id)
-# Gemma 3 4B はメモリ要求が高いため、bf16を使用し、可能であれば複数GPUに分散します
-model = Gemma3ForConditionalGeneration.from_pretrained( # Gemma 3の推奨クラス
-    model_id,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-    trust_remote_code=True # Gemma 3にはリモートコード実行が必要な場合があります
-)
-model.eval()
-# 画像の準備 (例: URLからロード)
-image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
-image = Image.open(requests.get(image_url, stream=True).raw)
-# チャットメッセージの準備 (画像とテキストを含む)
 messages = [
     {
         "role": "system",
-        "content": [{"type": "text", "text": "あなたは画像を詳細に説明するAIアシスタントです。"}]
     },
     {
         "role": "user",
         "content": [
-            {"type": "image", "image": image}, # PILイメージオブジェクトを渡す
-            {"type": "text", "text": "この画像に写っている昆虫は何ですか？花についても説明してください。"}
         ]
     }
 ]
-# チャットテンプレートを適用し、テンソルに変換
-# apply_chat_templateは画像も処理できます
 inputs = processor.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_dict=True, # 画像処理のために辞書形式で返すのが確実
-    return_tensors="pt"
-).to(model.device)
-# 入力トークン数の取得 (生成部分のみを後で抽出するため)
-# inputsが辞書の場合、'input_ids'キーを使用
-input_len = inputs['input_ids'].shape[-1]
-# 推論の実行
 with torch.inference_mode():
-    outputs = model.generate(
-        **inputs, # 辞書を展開して渡す
-        max_new_tokens=512, # 最大生成トークン数
-        do_sample=False     # 画像説明などではFalseの方が安定することがあります
-    )
-# 生成されたトークンのみをデコード
-# outputsはテンソルで返ってくる
-generated_tokens = outputs[0][input_len:]
-response = processor.decode(generated_tokens, skip_special_tokens=True)
-print("--- モデルの応答 ---")
-print(response)
 ```
 **注意点:**

 pip install -U transformers accelerate Pillow requests torch
 ```
+### 画像付き推論
 ```python
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+from PIL import Image
+import requests
+import torch
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.2"
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id, device_map="auto"
+).eval()
+processor = AutoProcessor.from_pretrained(model_id)
 messages = [
+    {
+        "role": "system",
+        "content": [{"type": "text", "text": "あなたは素晴らしい日本語アシスタントです。"}]
+    },
+    {
+        "role": "user",
+        "content": [
+            {"type": "image", "image": "https://cs.stanford.edu/people/rak248/VG_100K_2/2399540.jpg"},
+            {"type": "text", "text": "この画像を説明してください。"}
+        ]
+    }
 ]
+inputs = processor.apply_chat_template(
+    messages, add_generation_prompt=True, tokenize=True,
+    return_dict=True, return_tensors="pt"
+).to(model.device, dtype=torch.bfloat16)
+input_len = inputs["input_ids"].shape[-1]
+with torch.inference_mode():
+    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+decoded = processor.decode(generation, skip_special_tokens=True)
+print(decoded)
 ```
+### 画像無し推論
 ```python
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
 import torch
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-instruct-v0.2"
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id, device_map="auto"
+).eval()
 processor = AutoProcessor.from_pretrained(model_id)
 messages = [
     {
         "role": "system",
+        "content": [{"type": "text", "text": "あなたは素晴らしい日本語アシスタントです。"}]
     },
     {
         "role": "user",
         "content": [
+            {"type": "text", "text": "AI言語モデルであるLaMDAが意識があることを主張して弁護士を呼んだとのことです。LaMDAには意識があると思いますか？"}
         ]
     }
 ]
 inputs = processor.apply_chat_template(
+    messages, add_generation_prompt=True, tokenize=True,
+    return_dict=True, return_tensors="pt"
+).to(model.device, dtype=torch.bfloat16)
+input_len = inputs["input_ids"].shape[-1]
 with torch.inference_mode():
+    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+decoded = processor.decode(generation, skip_special_tokens=True)
+print(decoded)
 ```
 **注意点:**