Plat commited on
Commit
41b1248
·
1 Parent(s): a71047d

chore: import flash-attn first

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  from transformers import (
3
  AutoModelForCausalLM,
@@ -19,18 +30,6 @@ except:
19
  return lambda x: x
20
 
21
 
22
- try:
23
- import flash_attn
24
- except:
25
- import subprocess
26
-
27
- subprocess.run(
28
- "pip install flash-attn --no-build-isolation",
29
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
30
- shell=True,
31
- )
32
-
33
-
34
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
35
 
36
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 
1
+ try:
2
+ import flash_attn
3
+ except:
4
+ import subprocess
5
+
6
+ subprocess.run(
7
+ "pip install flash-attn --no-build-isolation",
8
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
9
+ shell=True,
10
+ )
11
+
12
  import torch
13
  from transformers import (
14
  AutoModelForCausalLM,
 
30
  return lambda x: x
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
34
 
35
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)