arya-ai-model commited on
Commit
b2c1928
·
1 Parent(s): f279b31

updated model.py

Browse files
Files changed (1) hide show
  1. model.py +9 -49
model.py CHANGED
@@ -1,63 +1,23 @@
1
- # import os
2
- # from transformers import AutoModelForCausalLM, AutoTokenizer
3
- # import torch
4
-
5
- # # Correct model name
6
- # MODEL_NAME = "bigcode/starcoder"
7
-
8
- # # Ensure the token is provided
9
- # HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
10
- # if not HF_TOKEN:
11
- # raise ValueError("Missing Hugging Face token. Set HUGGINGFACE_TOKEN as an environment variable.")
12
-
13
- # # Set device
14
- # device = "cuda" if torch.cuda.is_available() else "cpu"
15
-
16
- # # Load tokenizer with authentication
17
- # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
18
-
19
- # # Load model with optimizations
20
- # model = AutoModelForCausalLM.from_pretrained(
21
- # MODEL_NAME,
22
- # token=HF_TOKEN,
23
- # torch_dtype=torch.float16, # Reduce memory usage
24
- # low_cpu_mem_usage=True, # Optimize loading
25
- # device_map="auto", # Automatic device placement
26
- # offload_folder="offload" # Offload to disk if needed
27
- # ).to(device)
28
-
29
- # def generate_code(prompt: str, max_tokens: int = 256):
30
- # """Generates code based on the input prompt."""
31
- # if not prompt.strip():
32
- # return "Error: Empty prompt provided."
33
-
34
- # inputs = tokenizer(prompt, return_tensors="pt").to(device)
35
- # output = model.generate(**inputs, max_new_tokens=max_tokens)
36
- # return tokenizer.decode(output[0], skip_special_tokens=True)
37
-
38
  import os
39
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
40
  import torch
41
 
42
- MODEL_NAME = "bigcode/starcoderbase-3b" # Lighter version
43
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
44
 
45
- quant_config = BitsAndBytesConfig(
46
- load_in_4bit=True,
47
- bnb_4bit_quant_type="nf4",
48
- bnb_4bit_compute_dtype=torch.float16
49
- )
50
 
51
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
52
  model = AutoModelForCausalLM.from_pretrained(
53
  MODEL_NAME,
54
  token=HF_TOKEN,
55
- quantization_config=quant_config,
56
- device_map="auto",
57
  trust_remote_code=True
58
- )
59
 
60
  def generate_code(prompt: str, max_tokens: int = 256):
61
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
62
  output = model.generate(**inputs, max_new_tokens=max_tokens)
63
- return tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
+ MODEL_NAME = "bigcode/starcoderbase-1b"
6
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
7
 
8
+ # Force CPU mode
9
+ device = "cpu" # Change this from "cuda"
 
 
 
10
 
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_NAME,
14
  token=HF_TOKEN,
15
+ torch_dtype=torch.float16, # Keep memory low
16
+ device_map="auto", # Still allows auto placement
17
  trust_remote_code=True
18
+ ).to(device)
19
 
20
  def generate_code(prompt: str, max_tokens: int = 256):
21
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
22
  output = model.generate(**inputs, max_new_tokens=max_tokens)
23
+ return tokenizer.decode(output[0], skip_special_tokens=True)