dwb2023 commited on
Commit
bf0b05e
·
verified ·
1 Parent(s): cdf4c44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -0
app.py CHANGED
@@ -11,6 +11,14 @@ from transformers import (
11
  TextIteratorStreamer,
12
  LlamaTokenizer,
13
  )
 
 
 
 
 
 
 
 
14
 
15
  MAX_MAX_NEW_TOKENS = 1024
16
  DEFAULT_MAX_NEW_TOKENS = 50
 
11
  TextIteratorStreamer,
12
  LlamaTokenizer,
13
  )
14
+ import subprocess
15
+
16
+ subprocess.run(
17
+ "pip install flash-attn --no-build-isolation",
18
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
19
+ shell=True,
20
+ )
21
+
22
 
23
  MAX_MAX_NEW_TOKENS = 1024
24
  DEFAULT_MAX_NEW_TOKENS = 50