Spaces:
Sleeping
Sleeping
Update app.py
Browse filesRemoved flash attention-2
app.py
CHANGED
@@ -18,7 +18,7 @@ try:
|
|
18 |
torch_dtype="auto",
|
19 |
trust_remote_code=True,
|
20 |
use_cache=True, # Enable KV caching
|
21 |
-
|
22 |
)
|
23 |
|
24 |
except Exception as e:
|
@@ -49,8 +49,8 @@ def generate_response(user_message, history):
|
|
49 |
return_tensors="pt",
|
50 |
truncation=True,
|
51 |
max_length=2048,
|
52 |
-
|
53 |
-
|
54 |
).to(device)
|
55 |
|
56 |
with torch.no_grad():
|
|
|
18 |
torch_dtype="auto",
|
19 |
trust_remote_code=True,
|
20 |
use_cache=True, # Enable KV caching
|
21 |
+
# attn_implementation="flash_attention_2" if torch.cuda.is_available() else None
|
22 |
)
|
23 |
|
24 |
except Exception as e:
|
|
|
49 |
return_tensors="pt",
|
50 |
truncation=True,
|
51 |
max_length=2048,
|
52 |
+
# padding=True,
|
53 |
+
# return_attention_mask=True
|
54 |
).to(device)
|
55 |
|
56 |
with torch.no_grad():
|