Jeff Myers II commited on
Commit
5278642
·
1 Parent(s): a4a81f7

Enabling 8-bit quantization broke the model. Attempting to fix by removing 'pad_token_id' arg from AutoModelForCausalLM.from_pretrained

Browse files
Files changed (1) hide show
  1. Gemma.py +1 -1
Gemma.py CHANGED
@@ -12,7 +12,7 @@ class GemmaLLM:
12
  login(token=os.environ.get("GEMMA_TOKEN"))
13
 
14
  model_id = "google/gemma-3-4b-it"
15
- model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True, pad_token_id=0)
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
 
18
  self.model = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16)
 
12
  login(token=os.environ.get("GEMMA_TOKEN"))
13
 
14
  model_id = "google/gemma-3-4b-it"
15
+ model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True)
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
 
18
  self.model = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16)