hackergeek commited on
Commit
0b40748
Β·
verified Β·
1 Parent(s): 2b35f7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -41
app.py CHANGED
@@ -1,51 +1,103 @@
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
- from peft import PeftModel
4
-
5
- class DeepSeekLoraCPUInference:
6
- def __init__(self, base_model="deepseek-ai/deepseek-r1", fine_tuned_model="./deepseek_lora_finetuned"):
7
- self.tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model)
8
-
9
- # Load model in 4-bit on CPU (if no GPU is available)
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
11
- quant_config = BitsAndBytesConfig(
12
- load_in_4bit=True if device == "cuda" else False, # Use 4-bit only if GPU is available
13
- bnb_4bit_compute_dtype=torch.bfloat16,
 
 
 
 
 
 
14
  bnb_4bit_quant_type="nf4",
 
15
  bnb_4bit_use_double_quant=True
16
  )
17
 
18
- self.model = AutoModelForCausalLM.from_pretrained(
19
- base_model,
20
- quantization_config=quant_config if device == "cuda" else None,
21
  device_map=device
22
  )
23
 
24
- # Load fine-tuned LoRA model
25
- self.model = PeftModel.from_pretrained(self.model, fine_tuned_model)
26
- self.model.to(device)
27
- self.model.eval()
28
-
29
- def generate_text(self, prompt, max_length=200):
30
- """Generates text efficiently using CPU or GPU."""
31
- device = "cuda" if torch.cuda.is_available() else "cpu"
32
- inputs = self.tokenizer(prompt, return_tensors="pt").to(device)
33
-
34
- with torch.no_grad():
35
- output = self.model.generate(
36
- **inputs,
37
- max_length=max_length,
38
- temperature=0.7,
39
- top_p=0.9,
40
- repetition_penalty=1.1
41
- )
42
-
43
- return self.tokenizer.decode(output[0], skip_special_tokens=True)
44
-
45
- if __name__ == "__main__":
46
- model = DeepSeekLoraCPUInference()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- prompt = "The implications of AI in the next decade are"
49
- generated_text = model.generate_text(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- print("\nGenerated Text:\n", generated_text)
 
 
1
  import torch
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
4
+ from peft import get_peft_model, LoraConfig, TaskType
5
+ from datasets import load_dataset
6
+ from bitsandbytes import BitsAndBytesConfig
7
+
8
+ # βœ… Check if a GPU is available, otherwise use CPU
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ # βœ… Function to start training
12
+ def train_model(dataset_url, model_url, epochs):
13
+ try:
14
+ # Load the tokenizer
15
+ tokenizer = AutoTokenizer.from_pretrained(model_url)
16
+
17
+ # βœ… Load model with 4-bit quantization for CPU efficiency
18
+ bnb_config = BitsAndBytesConfig(
19
+ load_in_4bit=True if device == "cuda" else False,
20
  bnb_4bit_quant_type="nf4",
21
+ bnb_4bit_compute_dtype=torch.bfloat16,
22
  bnb_4bit_use_double_quant=True
23
  )
24
 
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ model_url,
27
+ quantization_config=bnb_config if device == "cuda" else None,
28
  device_map=device
29
  )
30
 
31
+ # βœ… Apply LoRA for efficient training
32
+ lora_config = LoraConfig(
33
+ task_type=TaskType.CAUSAL_LM,
34
+ r=8,
35
+ lora_alpha=32,
36
+ lora_dropout=0.1,
37
+ target_modules=["q_proj", "v_proj"]
38
+ )
39
+
40
+ model = get_peft_model(model, lora_config)
41
+ model.to(device)
42
+
43
+ # βœ… Load dataset
44
+ dataset = load_dataset(dataset_url)
45
+
46
+ # βœ… Tokenization function
47
+ def tokenize_function(examples):
48
+ return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256)
49
+
50
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
51
+ train_dataset = tokenized_datasets["train"]
52
+
53
+ # βœ… Training Arguments
54
+ training_args = TrainingArguments(
55
+ output_dir="./deepseek_lora_cpu",
56
+ evaluation_strategy="epoch",
57
+ learning_rate=5e-4,
58
+ per_device_train_batch_size=1,
59
+ per_device_eval_batch_size=1,
60
+ num_train_epochs=int(epochs),
61
+ save_strategy="epoch",
62
+ save_total_limit=2,
63
+ logging_dir="./logs",
64
+ logging_steps=10,
65
+ fp16=False,
66
+ gradient_checkpointing=True,
67
+ optim="adamw_torch",
68
+ report_to="none"
69
+ )
70
+
71
+ trainer = Trainer(
72
+ model=model,
73
+ args=training_args,
74
+ train_dataset=train_dataset
75
+ )
76
+
77
+ # βœ… Start Training
78
+ trainer.train()
79
+
80
+ # βœ… Save the Fine-Tuned Model
81
+ model.save_pretrained("./deepseek_lora_finetuned")
82
+ tokenizer.save_pretrained("./deepseek_lora_finetuned")
83
+
84
+ return "βœ… Training Completed! Model saved successfully."
85
 
86
+ except Exception as e:
87
+ return f"❌ Error: {str(e)}"
88
+
89
+ # βœ… Gradio UI
90
+ with gr.Blocks() as app:
91
+ gr.Markdown("# πŸš€ AutoTrain DeepSeek R1 (CPU)")
92
+
93
+ dataset_url = gr.Textbox(label="Dataset URL (Hugging Face)", placeholder="e.g. samsum")
94
+ model_url = gr.Textbox(label="Model URL (Hugging Face)", placeholder="e.g. deepseek-ai/deepseek-r1")
95
+ epochs = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Number of Training Epochs")
96
+
97
+ train_button = gr.Button("Start Training")
98
+ output_text = gr.Textbox(label="Training Output")
99
+
100
+ train_button.click(train_model, inputs=[dataset_url, model_url, epochs], outputs=output_text)
101
 
102
+ # βœ… Launch the app
103
+ app.launch(server_name="0.0.0.0", server_port=7860)