moondream2-batch-processing

Runtime error

Csplk commited on Jan 13

Commit

39c9043

1 Parent(s): b0bb062

fixed flash att

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,7 +16,6 @@ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
 moondream = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, revision=revision,
     torch_dtype=torch.bfloat16, device_map={"": "cuda"},
-    attn_implementation="flash_attention_2"
 )
 moondream.eval()

 moondream = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, revision=revision,
     torch_dtype=torch.bfloat16, device_map={"": "cuda"},
 )
 moondream.eval()