Spaces:
Runtime error
Runtime error
0.20 subprocess implementing flash_attn
Browse files- app.py +2 -1
- requirements.txt +1 -2
app.py
CHANGED
|
@@ -4,12 +4,13 @@ import torch
|
|
| 4 |
import gradio as gr
|
| 5 |
import logging
|
| 6 |
from huggingface_hub import login
|
| 7 |
-
from flash_attn.flash_attention import FlashAttention
|
| 8 |
|
| 9 |
import os
|
| 10 |
import traceback
|
| 11 |
|
| 12 |
from threading import Thread
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Status: Breaks during generation
|
| 15 |
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import logging
|
| 6 |
from huggingface_hub import login
|
|
|
|
| 7 |
|
| 8 |
import os
|
| 9 |
import traceback
|
| 10 |
|
| 11 |
from threading import Thread
|
| 12 |
+
import subprocess
|
| 13 |
+
subprocess.run('pip install -U flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 14 |
|
| 15 |
# Status: Breaks during generation
|
| 16 |
|
requirements.txt
CHANGED
|
@@ -5,5 +5,4 @@ accelerate==0.33.0
|
|
| 5 |
sentencepiece==0.2.0
|
| 6 |
spaces==0.29.2
|
| 7 |
gradio==4.39.0
|
| 8 |
-
bitsandbytes==0.43.2
|
| 9 |
-
flash-attn
|
|
|
|
| 5 |
sentencepiece==0.2.0
|
| 6 |
spaces==0.29.2
|
| 7 |
gradio==4.39.0
|
| 8 |
+
bitsandbytes==0.43.2
|
|
|