KingNish commited on
Commit
2936f7d
·
verified ·
1 Parent(s): 0eb082d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -14,9 +14,7 @@ print("Installing flash-attn...")
14
  subprocess.run(
15
  "pip install flash-attn --no-build-isolation",
16
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
17
- shell=True,
18
- capture_output=True, # Capture output for debugging
19
- text=True # Decode output as text
20
  )
21
 
22
  from huggingface_hub import snapshot_download
@@ -76,11 +74,9 @@ device = "cuda:0"
76
 
77
  model = AutoModelForCausalLM.from_pretrained(
78
  "m-a-p/YuE-s1-7B-anneal-en-cot",
79
- torch_dtype=torch.bfloat16,
80
  attn_implementation="flash_attention_2",
81
- # quantization_config=quantization_config,
82
  low_cpu_mem_usage=True,
83
- # device_map="auto"
84
  ).to(device)
85
  model.eval()
86
 
 
14
  subprocess.run(
15
  "pip install flash-attn --no-build-isolation",
16
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
17
+ shell=True
 
 
18
  )
19
 
20
  from huggingface_hub import snapshot_download
 
74
 
75
  model = AutoModelForCausalLM.from_pretrained(
76
  "m-a-p/YuE-s1-7B-anneal-en-cot",
77
+ torch_dtype=torch.float16,
78
  attn_implementation="flash_attention_2",
 
79
  low_cpu_mem_usage=True,
 
80
  ).to(device)
81
  model.eval()
82