ginipick commited on
Commit
cb17632
ยท
verified ยท
1 Parent(s): 5d47f79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -11
app.py CHANGED
@@ -154,18 +154,21 @@ def install_flash_attn():
154
 
155
  logging.info(f"Detected CUDA version: {cuda_version}")
156
 
157
- # CUDA 11.8 specific wheel for Python 3.10
158
- if cuda_version.startswith("11.8"):
 
 
159
  flash_attn_url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu11torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
160
- subprocess.run(
161
- ["pip", "install", flash_attn_url],
162
- check=True,
163
- capture_output=True
164
- )
165
  else:
166
  logging.warning(f"Unsupported CUDA version: {cuda_version}, skipping flash-attn installation")
167
  return False
168
 
 
 
 
 
 
 
169
  logging.info("flash-attn installed successfully!")
170
  return True
171
  except Exception as e:
@@ -437,11 +440,12 @@ Stay with me forever, let our love just flow
437
  )
438
 
439
  # ์„œ๋ฒ„ ์„ค์ •์œผ๋กœ ์‹คํ–‰
440
- demo.queue(concurrency_count=2).launch(
 
441
  server_name="0.0.0.0",
442
  server_port=7860,
443
  share=True,
444
- enable_queue=True,
445
  show_api=True,
446
- show_error=True
447
- )
 
 
154
 
155
  logging.info(f"Detected CUDA version: {cuda_version}")
156
 
157
+ # CUDA ๋ฒ„์ „๋ณ„ wheel ํŒŒ์ผ ์„ ํƒ
158
+ if cuda_version.startswith("12.1"):
159
+ flash_attn_url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.5/flash_attn-2.7.5+cu121torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
160
+ elif cuda_version.startswith("11.8"):
161
  flash_attn_url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu11torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
 
 
 
 
 
162
  else:
163
  logging.warning(f"Unsupported CUDA version: {cuda_version}, skipping flash-attn installation")
164
  return False
165
 
166
+ subprocess.run(
167
+ ["pip", "install", flash_attn_url],
168
+ check=True,
169
+ capture_output=True
170
+ )
171
+
172
  logging.info("flash-attn installed successfully!")
173
  return True
174
  except Exception as e:
 
440
  )
441
 
442
  # ์„œ๋ฒ„ ์„ค์ •์œผ๋กœ ์‹คํ–‰
443
+ # ์„œ๋ฒ„ ์„ค์ •์œผ๋กœ ์‹คํ–‰
444
+ demo.queue(max_size=20).launch(
445
  server_name="0.0.0.0",
446
  server_port=7860,
447
  share=True,
 
448
  show_api=True,
449
+ show_error=True,
450
+ concurrency_count=2 # queue()๊ฐ€ ์•„๋‹Œ launch()์˜ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ์ด๋™
451
+ )