feat: use static kv cache
Browse files- generation_utils.py +1 -0
generation_utils.py
CHANGED
@@ -375,6 +375,7 @@ def process_batch(batch_items, tokenizer, model, spt, device, system_prompt, sta
|
|
375 |
outputs = model.generate(
|
376 |
input_ids=input_ids,
|
377 |
attention_mask=attention_mask,
|
|
|
378 |
)
|
379 |
print(f"Original outputs shape: {outputs.shape}")
|
380 |
print(f"Start value: {start}")
|
|
|
375 |
outputs = model.generate(
|
376 |
input_ids=input_ids,
|
377 |
attention_mask=attention_mask,
|
378 |
+
cache_implementation="static",
|
379 |
)
|
380 |
print(f"Original outputs shape: {outputs.shape}")
|
381 |
print(f"Start value: {start}")
|