Spaces:
Running
on
Zero
Running
on
Zero
fixed flash att
Browse files
app.py
CHANGED
@@ -16,7 +16,6 @@ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
|
|
16 |
moondream = AutoModelForCausalLM.from_pretrained(
|
17 |
model_id, trust_remote_code=True, revision=revision,
|
18 |
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
|
19 |
-
attn_implementation="flash_attention_2"
|
20 |
)
|
21 |
|
22 |
moondream.eval()
|
|
|
16 |
moondream = AutoModelForCausalLM.from_pretrained(
|
17 |
model_id, trust_remote_code=True, revision=revision,
|
18 |
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
|
|
|
19 |
)
|
20 |
|
21 |
moondream.eval()
|