Spaces:
Sleeping
Sleeping
File size: 2,134 Bytes
eb66cb5 d9ded01 eb66cb5 ea0af80 eb66cb5 d9ded01 eb66cb5 d36dc81 d9ded01 d36dc81 eb66cb5 ea0af80 eb66cb5 ea0af80 eb66cb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# import os
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch
# # Correct model name
# MODEL_NAME = "bigcode/starcoder"
# # Ensure the token is provided
# HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
# if not HF_TOKEN:
# raise ValueError("Missing Hugging Face token. Set HUGGINGFACE_TOKEN as an environment variable.")
# # Set device
# device = "cuda" if torch.cuda.is_available() else "cpu"
# # Load tokenizer with authentication
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
# # Load model with optimizations
# model = AutoModelForCausalLM.from_pretrained(
# MODEL_NAME,
# token=HF_TOKEN,
# torch_dtype=torch.float16, # Reduce memory usage
# low_cpu_mem_usage=True, # Optimize loading
# device_map="auto", # Automatic device placement
# offload_folder="offload" # Offload to disk if needed
# ).to(device)
# def generate_code(prompt: str, max_tokens: int = 256):
# """Generates code based on the input prompt."""
# if not prompt.strip():
# return "Error: Empty prompt provided."
# inputs = tokenizer(prompt, return_tensors="pt").to(device)
# output = model.generate(**inputs, max_new_tokens=max_tokens)
# return tokenizer.decode(output[0], skip_special_tokens=True)
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
MODEL_NAME = "bigcode/starcoderbase-1b" # Lighter version
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
token=HF_TOKEN,
quantization_config=quant_config,
device_map="auto",
trust_remote_code=True
)
def generate_code(prompt: str, max_tokens: int = 256):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=max_tokens)
return tokenizer.decode(output[0], skip_special_tokens=True) |