Plat commited on
Commit
a71047d
·
1 Parent(s): 68a9510

chore: install flash-attn at runtime

Browse files
Files changed (2) hide show
  1. app.py +12 -0
  2. requirements.txt +0 -1
app.py CHANGED
@@ -19,6 +19,18 @@ except:
19
  return lambda x: x
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
23
 
24
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 
19
  return lambda x: x
20
 
21
 
22
+ try:
23
+ import flash_attn
24
+ except:
25
+ import subprocess
26
+
27
+ subprocess.run(
28
+ "pip install flash-attn --no-build-isolation",
29
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
30
+ shell=True,
31
+ )
32
+
33
+
34
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
35
 
36
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
requirements.txt CHANGED
@@ -4,4 +4,3 @@ accelerate==0.30.1
4
  transformers==4.41.2
5
  spaces==0.28.3
6
  bitsandbytes==0.43.1
7
- flash-attn==2.5.9.post1
 
4
  transformers==4.41.2
5
  spaces==0.28.3
6
  bitsandbytes==0.43.1