cloudyu commited on
Commit
f67ecc4
·
verified ·
1 Parent(s): 037c396

Delete dpo.py

Browse files
Files changed (1) hide show
  1. dpo.py +0 -99
dpo.py DELETED
@@ -1,99 +0,0 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from urllib.parse import unquote_plus
4
- import os
5
-
6
- from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
7
- DataCollatorForLanguageModeling, Trainer, TrainingArguments
8
- from transformers import BitsAndBytesConfig
9
- from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
10
-
11
- nf4_config = BitsAndBytesConfig(
12
- load_in_4bit=True,
13
- bnb_4bit_quant_type="nf4",
14
- bnb_4bit_use_double_quant=True,
15
- bnb_4bit_compute_dtype=torch.bfloat16
16
- )
17
-
18
- # Carregar o modelo e o tokenizador na GPU
19
- device = "cuda:0"
20
- model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
21
- model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=nf4_config,device_map="auto",local_files_only=False,trust_remote_code=True)
22
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=False)
23
- if tokenizer.pad_token is None:
24
- tokenizer.pad_token = tokenizer.eos_token
25
- print(model)
26
- from transformers import AutoModelForCausalLM
27
- from datasets import load_dataset
28
- from trl import *
29
-
30
- # jondurbin/truthy-dpo-v0.1
31
-
32
- def return_prompt_and_responses(samples) :
33
- return {
34
- "prompt": [
35
- "Question: " + question + "\n\nAnswer: "
36
- for question in samples["prompt"]
37
- ],
38
- "chosen": samples["chosen"], # rated better than k
39
- "rejected": samples["rejected"], # rated worse than j
40
- }
41
-
42
- dataset = load_dataset(
43
- "jondurbin/truthy-dpo-v0.1",
44
- split="train",
45
- #data_dir="data/rl"
46
- )
47
- original_columns = dataset.column_names
48
-
49
- dataset.map(
50
- return_prompt_and_responses,
51
- batched=True,
52
- remove_columns=original_columns
53
- )
54
-
55
-
56
- model = prepare_model_for_kbit_training(model)
57
-
58
- peft_config = LoraConfig(
59
- r=128,
60
- lora_alpha=16,
61
- target_modules=["q_proj","k_proj","v_proj","o_proj", "up_proj","gate_proj","down_proj", "lm_head"],
62
- lora_dropout=0.05,
63
- bias="none",
64
- task_type="CAUSAL_LM",
65
- )
66
- output_dir = "./odp"
67
- training_args = TrainingArguments(
68
- per_device_train_batch_size=1,
69
- gradient_accumulation_steps=1,
70
- gradient_checkpointing =True,
71
- max_grad_norm= 0.3,
72
- optim='adafactor',
73
- overwrite_output_dir=True,save_steps=100,
74
- num_train_epochs=1,
75
- learning_rate=2e-4,
76
- bf16=True,
77
- save_total_limit=3,
78
- logging_steps=10,
79
- output_dir=output_dir,
80
- lr_scheduler_type="cosine",
81
- warmup_ratio=0.05,
82
- )
83
-
84
- dpo_trainer = DPOTrainer(
85
- model,
86
- #model_ref,
87
- args=training_args,
88
- peft_config=peft_config,
89
- beta=0.1,
90
- train_dataset=dataset,
91
- tokenizer=tokenizer,
92
- max_prompt_length=1024,
93
- max_length=2048,
94
- )
95
-
96
- dpo_trainer.train()
97
-
98
-
99
-