Shriti09 commited on
Commit
e5e9af1
·
verified ·
1 Parent(s): 69bbe3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -1,19 +1,12 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from peft import PeftModel
4
- import gradio as gr
5
-
6
- # Use GPU if available
7
- device = "cuda" if torch.cuda.is_available() else "cpu"
8
-
9
- # Base model and adapter paths
10
- base_model_name = "microsoft/phi-2" # Pull from HF Hub directly
11
-
12
  import torch
13
  from transformers import AutoModelForCausalLM, AutoTokenizer
14
  from peft import PeftModel
15
  import gradio as gr
16
  import os
 
 
 
 
17
 
18
  # Use GPU if available
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -23,19 +16,22 @@ base_model_name = "microsoft/phi-2" # Pull from HF Hub directly
23
  adapter_path = "Shriti09/Microsoft-Phi-QLora" # Update with your Hugging Face repo path
24
 
25
  # Create an offload directory to store the model parts
26
- offload_dir = "./offload" # Replace with your desired path
27
 
28
  # Ensure the offload directory exists
29
  os.makedirs(offload_dir, exist_ok=True)
30
 
31
  print("🔧 Loading base model...")
 
32
  base_model = AutoModelForCausalLM.from_pretrained(
33
  base_model_name,
34
- device_map="auto",
35
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
36
- offload_dir=offload_dir # Set offload directory here
37
  )
38
 
 
 
 
39
  print("🔧 Loading LoRA adapter...")
40
  adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
41
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
  import gradio as gr
5
  import os
6
+ from accelerate import Accelerator
7
+
8
+ # Initialize the Accelerator to manage device placement and offloading
9
+ accelerator = Accelerator()
10
 
11
  # Use GPU if available
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
16
  adapter_path = "Shriti09/Microsoft-Phi-QLora" # Update with your Hugging Face repo path
17
 
18
  # Create an offload directory to store the model parts
19
+ offload_dir = "./offload_dir" # Set offload directory here
20
 
21
  # Ensure the offload directory exists
22
  os.makedirs(offload_dir, exist_ok=True)
23
 
24
  print("🔧 Loading base model...")
25
+ # Using the Accelerator to load the model and dispatch to the correct devices
26
  base_model = AutoModelForCausalLM.from_pretrained(
27
  base_model_name,
28
+ device_map="auto", # Automatically map the model to available devices
29
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 
30
  )
31
 
32
+ # Ensure the model is offloaded when necessary
33
+ base_model = accelerator.prepare(base_model)
34
+
35
  print("🔧 Loading LoRA adapter...")
36
  adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
37