chegde commited on
Commit
f9a267b
·
verified ·
1 Parent(s): ab95480

Update app.py

Browse files

Removed flash attention-2

Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -18,7 +18,7 @@ try:
18
  torch_dtype="auto",
19
  trust_remote_code=True,
20
  use_cache=True, # Enable KV caching
21
- attn_implementation="flash_attention_2" if torch.cuda.is_available() else None
22
  )
23
 
24
  except Exception as e:
@@ -49,8 +49,8 @@ def generate_response(user_message, history):
49
  return_tensors="pt",
50
  truncation=True,
51
  max_length=2048,
52
- padding=True,
53
- return_attention_mask=True
54
  ).to(device)
55
 
56
  with torch.no_grad():
 
18
  torch_dtype="auto",
19
  trust_remote_code=True,
20
  use_cache=True, # Enable KV caching
21
+ # attn_implementation="flash_attention_2" if torch.cuda.is_available() else None
22
  )
23
 
24
  except Exception as e:
 
49
  return_tensors="pt",
50
  truncation=True,
51
  max_length=2048,
52
+ # padding=True,
53
+ # return_attention_mask=True
54
  ).to(device)
55
 
56
  with torch.no_grad():