Spaces:
Sleeping
Sleeping
# import os | |
# from transformers import AutoModelForCausalLM, AutoTokenizer | |
# import torch | |
# # Correct model name | |
# MODEL_NAME = "bigcode/starcoder" | |
# # Ensure the token is provided | |
# HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
# if not HF_TOKEN: | |
# raise ValueError("Missing Hugging Face token. Set HUGGINGFACE_TOKEN as an environment variable.") | |
# # Set device | |
# device = "cuda" if torch.cuda.is_available() else "cpu" | |
# # Load tokenizer with authentication | |
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN) | |
# # Load model with optimizations | |
# model = AutoModelForCausalLM.from_pretrained( | |
# MODEL_NAME, | |
# token=HF_TOKEN, | |
# torch_dtype=torch.float16, # Reduce memory usage | |
# low_cpu_mem_usage=True, # Optimize loading | |
# device_map="auto", # Automatic device placement | |
# offload_folder="offload" # Offload to disk if needed | |
# ).to(device) | |
# def generate_code(prompt: str, max_tokens: int = 256): | |
# """Generates code based on the input prompt.""" | |
# if not prompt.strip(): | |
# return "Error: Empty prompt provided." | |
# inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
# output = model.generate(**inputs, max_new_tokens=max_tokens) | |
# return tokenizer.decode(output[0], skip_special_tokens=True) | |
import os | |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
import torch | |
MODEL_NAME = "bigcode/starcoderbase-3b" # Lighter version | |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
quant_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.float16 | |
) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
token=HF_TOKEN, | |
quantization_config=quant_config, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
def generate_code(prompt: str, max_tokens: int = 256): | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
output = model.generate(**inputs, max_new_tokens=max_tokens) | |
return tokenizer.decode(output[0], skip_special_tokens=True) |