Spaces:

Cylanoid
/

Nursing-Home-Fraud-Detection-using-Llama

Paused

Cylanoid commited on Mar 7

Commit

e5f8a81

verified ·

1 Parent(s): 4329ec1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,14 +33,11 @@ huggingface_hub.login(token=LLama)
 MODEL_ID = "meta-llama/Llama-2-7b-hf"
 tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-# Check CUDA and enable Flash Attention if supported
-use_flash_attention = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
-attn_implementation = "flash_attention_2" if use_flash_attention else "eager"  # Default to eager if no compatible GPU
 model = LlamaForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
     device_map="auto",
-    attn_implementation=attn_implementation,
     load_in_8bit=True
 )

 MODEL_ID = "meta-llama/Llama-2-7b-hf"
 tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+# Load model with default attention mechanism (no Flash Attention)
 model = LlamaForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     load_in_8bit=True
 )