keerthanaBasavaraj's picture
add cpu bitsandbytes
749ca16
raw
history blame
975 Bytes
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
def load_model(model_name="chatdb/natural-sql-7b"):
"""
Loads the SQL generation model with 8-bit precision.
"""
tokenizer = AutoTokenizer.from_pretrained(model_name)
quantization_config = BitsAndBytesConfig(
load_in_8bit=True, # Enable 8-bit loading
llm_int8_threshold=6.0 # Fine-tune threshold if needed
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
return tokenizer, model
def generate_sql(question, prompt_inputs, tokenizer, model, device="cpu"):
"""
Generates an SQL query based on the question and schema.
"""
prompt = prompt_inputs["formatted_prompt"]
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_new_tokens=128,
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)