jonACE commited on
Commit
11f9c5f
·
verified ·
1 Parent(s): 71c0be4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -38
app.py CHANGED
@@ -8,6 +8,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments,
8
  from peft import LoraConfig, get_peft_model
9
  import gradio as gr
10
  from transformers import pipeline
 
 
 
11
 
12
 
13
  def extract_text_from_pdf(pdf_path):
@@ -36,7 +39,13 @@ login(token=hf_token)
36
  #model_name = "meta-llama/Llama-2-7b-hf" # You can use a smaller one like "meta-llama/Llama-2-7b-chat-hf"
37
  model_name = "unsloth/llama-2-7b-chat"
38
 
39
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
40
 
41
  # Create dataset
42
  data = {"text": [clean_text]}
@@ -55,43 +64,31 @@ def tokenize_function(examples):
55
 
56
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
57
 
58
- # Load LLaMA 2 model in 4-bit mode to save memory
59
- model = AutoModelForCausalLM.from_pretrained(
60
- model_name,
61
- load_in_4bit=True, # Use 4-bit quantization for efficiency
62
- device_map="auto"
63
- #device_map="cpu",
64
- #quantization_config=None
65
- )
66
-
67
- # Apply LoRA (efficient fine-tuning)
68
- lora_config = LoraConfig(
69
- r=8, # Low-rank parameter
70
- lora_alpha=32,
71
- target_modules=["q_proj", "v_proj"], # Applies only to attention layers
72
- lora_dropout=0.05
73
- )
74
-
75
- model = get_peft_model(model, lora_config)
76
-
77
- training_args = TrainingArguments(
78
- output_dir="./results",
79
- evaluation_strategy="no", # Disable evaluation (to enable, change value to 'epoch')
80
- learning_rate=2e-4,
81
- per_device_train_batch_size=1, # Reduce batch size for memory efficiency
82
- per_device_eval_batch_size=1,
83
- num_train_epochs=3,
84
- weight_decay=0.01,
85
- save_strategy="epoch",
86
- logging_dir="./logs",
87
- logging_steps=10,
88
- )
89
-
90
- trainer = Trainer(
91
- model=model,
92
- args=training_args,
93
- train_dataset=tokenized_datasets,
94
- tokenizer=tokenizer,
95
  )
96
 
97
  def perform_training():
 
8
  from peft import LoraConfig, get_peft_model
9
  import gradio as gr
10
  from transformers import pipeline
11
+ from unsloth import FastLanguageModel, is_bfloat16_supported
12
+ import torch
13
+ from trl import SFTTrainer
14
 
15
 
16
  def extract_text_from_pdf(pdf_path):
 
39
  #model_name = "meta-llama/Llama-2-7b-hf" # You can use a smaller one like "meta-llama/Llama-2-7b-chat-hf"
40
  model_name = "unsloth/llama-2-7b-chat"
41
 
42
+ #tokenizer = AutoTokenizer.from_pretrained(model_name)
43
+ model, tokenizer = FastLanguageModel.from_pretrained(
44
+ model_name=model_name,
45
+ max_seq_length=2048
46
+ )
47
+
48
+ model = FastLanguageModel.get_peft_model(model)
49
 
50
  # Create dataset
51
  data = {"text": [clean_text]}
 
64
 
65
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
66
 
67
+ trainer = SFTTrainer(
68
+ model = model,
69
+ tokenizer = tokenizer,
70
+ train_dataset = dataset,
71
+ dataset_text_field = "text",
72
+ max_seq_length = max_seq_length,
73
+ dataset_num_proc = 2,
74
+ packing = False, # Can make training 5x faster for short sequences.
75
+ args = TrainingArguments(
76
+ per_device_train_batch_size = 2,
77
+ gradient_accumulation_steps = 4,
78
+ warmup_steps = 5,
79
+ # num_train_epochs = 1, # Set this for 1 full training run.
80
+ max_steps = 60,
81
+ learning_rate = 2e-4,
82
+ fp16 = not is_bfloat16_supported(),
83
+ bf16 = is_bfloat16_supported(),
84
+ logging_steps = 1,
85
+ optim = "adamw_8bit",
86
+ weight_decay = 0.01,
87
+ lr_scheduler_type = "linear",
88
+ seed = 3407,
89
+ output_dir = "outputs",
90
+ report_to = "none", # Use this for WandB etc
91
+ ),
 
 
 
 
 
 
 
 
 
 
 
 
92
  )
93
 
94
  def perform_training():