Shriti09 commited on
Commit
5010915
·
verified ·
1 Parent(s): e5e9af1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -14
app.py CHANGED
@@ -3,10 +3,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from peft import PeftModel
4
  import gradio as gr
5
  import os
6
- from accelerate import Accelerator
7
-
8
- # Initialize the Accelerator to manage device placement and offloading
9
- accelerator = Accelerator()
10
 
11
  # Use GPU if available
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -15,23 +11,13 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
15
  base_model_name = "microsoft/phi-2" # Pull from HF Hub directly
16
  adapter_path = "Shriti09/Microsoft-Phi-QLora" # Update with your Hugging Face repo path
17
 
18
- # Create an offload directory to store the model parts
19
- offload_dir = "./offload_dir" # Set offload directory here
20
-
21
- # Ensure the offload directory exists
22
- os.makedirs(offload_dir, exist_ok=True)
23
-
24
  print("🔧 Loading base model...")
25
  # Using the Accelerator to load the model and dispatch to the correct devices
26
  base_model = AutoModelForCausalLM.from_pretrained(
27
  base_model_name,
28
- device_map="auto", # Automatically map the model to available devices
29
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
30
  )
31
 
32
- # Ensure the model is offloaded when necessary
33
- base_model = accelerator.prepare(base_model)
34
-
35
  print("🔧 Loading LoRA adapter...")
36
  adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
37
 
 
3
  from peft import PeftModel
4
  import gradio as gr
5
  import os
 
 
 
 
6
 
7
  # Use GPU if available
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
11
  base_model_name = "microsoft/phi-2" # Pull from HF Hub directly
12
  adapter_path = "Shriti09/Microsoft-Phi-QLora" # Update with your Hugging Face repo path
13
 
 
 
 
 
 
 
14
  print("🔧 Loading base model...")
15
  # Using the Accelerator to load the model and dispatch to the correct devices
16
  base_model = AutoModelForCausalLM.from_pretrained(
17
  base_model_name,
 
18
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
19
  )
20
 
 
 
 
21
  print("🔧 Loading LoRA adapter...")
22
  adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
23