Spaces:
Building
on
L40S
Building
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -154,27 +154,42 @@ def install_flash_attn():
|
|
154 |
|
155 |
logging.info(f"Detected CUDA version: {cuda_version}")
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
logging.warning(f"Unsupported CUDA version: {cuda_version}, skipping flash-attn installation")
|
164 |
-
return False
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
-
logging.info("flash-attn installed successfully!")
|
173 |
-
return True
|
174 |
except Exception as e:
|
175 |
logging.warning(f"Failed to install flash-attn: {e}")
|
176 |
return False
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
def initialize_system():
|
179 |
optimize_gpu_settings()
|
180 |
has_flash_attn = install_flash_attn()
|
|
|
154 |
|
155 |
logging.info(f"Detected CUDA version: {cuda_version}")
|
156 |
|
157 |
+
try:
|
158 |
+
import flash_attn
|
159 |
+
logging.info("flash-attn already installed")
|
160 |
+
return True
|
161 |
+
except ImportError:
|
162 |
+
logging.info("Installing flash-attn...")
|
|
|
|
|
163 |
|
164 |
+
# CUDA 12.1μ© μ§μ μ€μΉ μλ
|
165 |
+
try:
|
166 |
+
subprocess.run(
|
167 |
+
["pip", "install", "flash-attn", "--no-build-isolation"],
|
168 |
+
check=True,
|
169 |
+
capture_output=True
|
170 |
+
)
|
171 |
+
logging.info("flash-attn installed successfully!")
|
172 |
+
return True
|
173 |
+
except subprocess.CalledProcessError:
|
174 |
+
logging.warning("Failed to install flash-attn via pip, skipping...")
|
175 |
+
return False
|
176 |
|
|
|
|
|
177 |
except Exception as e:
|
178 |
logging.warning(f"Failed to install flash-attn: {e}")
|
179 |
return False
|
180 |
|
181 |
+
# ... (λλ¨Έμ§ μ½λλ λμΌ) ...
|
182 |
+
|
183 |
+
# μλ² μ€μ μΌλ‘ μ€ν λΆλΆλ§ μμ
|
184 |
+
demo.queue(max_size=20).launch(
|
185 |
+
server_name="0.0.0.0",
|
186 |
+
server_port=7860,
|
187 |
+
share=True,
|
188 |
+
show_api=True,
|
189 |
+
show_error=True,
|
190 |
+
max_threads=2 # concurrency_count λμ max_threads μ¬μ©
|
191 |
+
)
|
192 |
+
|
193 |
def initialize_system():
|
194 |
optimize_gpu_settings()
|
195 |
has_flash_attn = install_flash_attn()
|