yhzx233 commited on
Commit
8a472c5
·
1 Parent(s): 7ddb63f

feat: use static kv cache

Browse files
Files changed (1) hide show
  1. generation_utils.py +1 -0
generation_utils.py CHANGED
@@ -375,6 +375,7 @@ def process_batch(batch_items, tokenizer, model, spt, device, system_prompt, sta
375
  outputs = model.generate(
376
  input_ids=input_ids,
377
  attention_mask=attention_mask,
 
378
  )
379
  print(f"Original outputs shape: {outputs.shape}")
380
  print(f"Start value: {start}")
 
375
  outputs = model.generate(
376
  input_ids=input_ids,
377
  attention_mask=attention_mask,
378
+ cache_implementation="static",
379
  )
380
  print(f"Original outputs shape: {outputs.shape}")
381
  print(f"Start value: {start}")