Spaces:
Sleeping
Sleeping
Plat
commited on
Commit
·
41b1248
1
Parent(s):
a71047d
chore: import flash-attn first
Browse files
app.py
CHANGED
@@ -1,3 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import torch
|
2 |
from transformers import (
|
3 |
AutoModelForCausalLM,
|
@@ -19,18 +30,6 @@ except:
|
|
19 |
return lambda x: x
|
20 |
|
21 |
|
22 |
-
try:
|
23 |
-
import flash_attn
|
24 |
-
except:
|
25 |
-
import subprocess
|
26 |
-
|
27 |
-
subprocess.run(
|
28 |
-
"pip install flash-attn --no-build-isolation",
|
29 |
-
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
30 |
-
shell=True,
|
31 |
-
)
|
32 |
-
|
33 |
-
|
34 |
MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
|
35 |
|
36 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
|
1 |
+
try:
|
2 |
+
import flash_attn
|
3 |
+
except:
|
4 |
+
import subprocess
|
5 |
+
|
6 |
+
subprocess.run(
|
7 |
+
"pip install flash-attn --no-build-isolation",
|
8 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
9 |
+
shell=True,
|
10 |
+
)
|
11 |
+
|
12 |
import torch
|
13 |
from transformers import (
|
14 |
AutoModelForCausalLM,
|
|
|
30 |
return lambda x: x
|
31 |
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
|
34 |
|
35 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|