Spaces:

chegde
/

verithoughts-demo

Sleeping

chegde commited on May 23

Commit

f9a267b

verified ·

1 Parent(s): ab95480

Update app.py

Removed flash attention-2

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ try:
         torch_dtype="auto",
         trust_remote_code=True,
         use_cache=True,  # Enable KV caching
-        attn_implementation="flash_attention_2" if torch.cuda.is_available() else None
     )
 except Exception as e:
@@ -49,8 +49,8 @@ def generate_response(user_message, history):
         return_tensors="pt",
         truncation=True,
         max_length=2048,
-        padding=True,
-        return_attention_mask=True
     ).to(device)
     with torch.no_grad():

         torch_dtype="auto",
         trust_remote_code=True,
         use_cache=True,  # Enable KV caching
+ #       attn_implementation="flash_attention_2" if torch.cuda.is_available() else None
     )
 except Exception as e:
         return_tensors="pt",
         truncation=True,
         max_length=2048,
+ #       padding=True,
+ #       return_attention_mask=True
     ).to(device)
     with torch.no_grad():