Sakalti commited on
Commit
6fddc37
·
verified ·
1 Parent(s): 1090fe1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from dotenv import load_dotenv
3
- from datasets import load_dataset
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
5
  from huggingface_hub import login
6
 
@@ -14,10 +14,12 @@ login(HF_TOKEN)
14
  # === 設定 ===
15
  BASE_MODEL = "Sakalti/template-4"
16
  HF_REPO = "Sakalti/template-16"
17
-
18
  # === データ読み込み ===
19
- dataset = load_dataset("Verah/JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", split="train")
20
-
 
 
21
  # === トークナイザー & モデル準備 ===
22
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
23
  model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
 
1
  import os
2
  from dotenv import load_dotenv
3
+ from datasets import load_dataset, concatenate_datasets
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
5
  from huggingface_hub import login
6
 
 
14
  # === 設定 ===
15
  BASE_MODEL = "Sakalti/template-4"
16
  HF_REPO = "Sakalti/template-16"
17
+ HachiML/alpaca_jp_python
18
  # === データ読み込み ===
19
+ dataset1 = load_dataset("Verah/JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", split="train")
20
+ dataset2 = load_dataset("HachiML/alpaca_jp_python", split="train")
21
+ dataset3 = load_dataset("HachiML/alpaca_jp_math", split="train")
22
+ dataset = concatenate_dataset([dataset1],[dataset2],[dataset3])
23
  # === トークナイザー & モデル準備 ===
24
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
25
  model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)