mmcgovern574 commited on
Commit
3c0f01f
·
verified ·
1 Parent(s): 4487681

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -85,7 +85,7 @@ model = AutoModelForCausalLM.from_pretrained(
85
  MODEL_ID,
86
  device_map="auto",
87
  quantization_config=quantization_config,
88
- use_flash_attention_2=True, # Enable Flash Attention 2 for better performance
89
  torch_dtype=torch.bfloat16
90
  )
91
 
 
85
  MODEL_ID,
86
  device_map="auto",
87
  quantization_config=quantization_config,
88
+ attn_implementation="flash_attention_2", # Updated Flash Attention 2 parameter
89
  torch_dtype=torch.bfloat16
90
  )
91