Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from dotenv import load_dotenv
|
3 |
-
from datasets import load_dataset
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
5 |
from huggingface_hub import login
|
6 |
|
@@ -14,10 +14,12 @@ login(HF_TOKEN)
|
|
14 |
# === 設定 ===
|
15 |
BASE_MODEL = "Sakalti/template-4"
|
16 |
HF_REPO = "Sakalti/template-16"
|
17 |
-
|
18 |
# === データ読み込み ===
|
19 |
-
|
20 |
-
|
|
|
|
|
21 |
# === トークナイザー & モデル準備 ===
|
22 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
23 |
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
|
|
|
1 |
import os
|
2 |
from dotenv import load_dotenv
|
3 |
+
from datasets import load_dataset, concatenate_datasets
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
5 |
from huggingface_hub import login
|
6 |
|
|
|
14 |
# === 設定 ===
|
15 |
BASE_MODEL = "Sakalti/template-4"
|
16 |
HF_REPO = "Sakalti/template-16"
|
17 |
+
HachiML/alpaca_jp_python
|
18 |
# === データ読み込み ===
|
19 |
+
dataset1 = load_dataset("Verah/JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", split="train")
|
20 |
+
dataset2 = load_dataset("HachiML/alpaca_jp_python", split="train")
|
21 |
+
dataset3 = load_dataset("HachiML/alpaca_jp_math", split="train")
|
22 |
+
dataset = concatenate_dataset([dataset1],[dataset2],[dataset3])
|
23 |
# === トークナイザー & モデル準備 ===
|
24 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
25 |
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
|