rrevo commited on
Commit
d57e49b
·
1 Parent(s): 03ccab8
Files changed (3) hide show
  1. run-server-prod.sh +4 -0
  2. server/pdm.lock +48 -1
  3. server/pyproject.toml +2 -0
run-server-prod.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ cd server
2
+
3
+ DEVICE=cuda:0 ATTN_IMPLEMENTATION=flash_attention_2 ./.venv/bin/uvicorn src.main:app --reload --port 3535
4
+ cd -
server/pdm.lock CHANGED
@@ -5,7 +5,7 @@
5
  groups = ["default"]
6
  strategy = ["cross_platform"]
7
  lock_version = "4.4"
8
- content_hash = "sha256:29fec1d6f8c4bac2b381a864faff2fd0e5f2c130535ff42ba32e01ddd9da623b"
9
 
10
  [[package]]
11
  name = "accelerate"
@@ -123,6 +123,16 @@ files = [
123
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
124
  ]
125
 
 
 
 
 
 
 
 
 
 
 
126
  [[package]]
127
  name = "fastapi"
128
  version = "0.109.0"
@@ -148,6 +158,21 @@ files = [
148
  {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"},
149
  ]
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  [[package]]
152
  name = "fsspec"
153
  version = "2023.12.2"
@@ -281,6 +306,28 @@ files = [
281
  {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"},
282
  ]
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  [[package]]
285
  name = "numpy"
286
  version = "1.26.3"
 
5
  groups = ["default"]
6
  strategy = ["cross_platform"]
7
  lock_version = "4.4"
8
+ content_hash = "sha256:d8c1f37f03db3efb198648ac89a26e779be2da38b5818b926eef4c9046dc2c64"
9
 
10
  [[package]]
11
  name = "accelerate"
 
123
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
124
  ]
125
 
126
+ [[package]]
127
+ name = "einops"
128
+ version = "0.7.0"
129
+ requires_python = ">=3.8"
130
+ summary = "A new flavour of deep learning operations"
131
+ files = [
132
+ {file = "einops-0.7.0-py3-none-any.whl", hash = "sha256:0f3096f26b914f465f6ff3c66f5478f9a5e380bb367ffc6493a68143fbbf1fd1"},
133
+ {file = "einops-0.7.0.tar.gz", hash = "sha256:b2b04ad6081a3b227080c9bf5e3ace7160357ff03043cd66cc5b2319eb7031d1"},
134
+ ]
135
+
136
  [[package]]
137
  name = "fastapi"
138
  version = "0.109.0"
 
158
  {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"},
159
  ]
160
 
161
+ [[package]]
162
+ name = "flash-attn"
163
+ version = "2.4.2"
164
+ requires_python = ">=3.7"
165
+ summary = ""
166
+ dependencies = [
167
+ "einops",
168
+ "ninja",
169
+ "packaging",
170
+ "torch",
171
+ ]
172
+ files = [
173
+ {file = "flash_attn-2.4.2.tar.gz", hash = "sha256:eb822a8c4219b610e9d734cbc8cd9ee4547f27433815a2b90dc1462766feefc1"},
174
+ ]
175
+
176
  [[package]]
177
  name = "fsspec"
178
  version = "2023.12.2"
 
306
  {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"},
307
  ]
308
 
309
+ [[package]]
310
+ name = "ninja"
311
+ version = "1.11.1.1"
312
+ summary = "Ninja is a small build system with a focus on speed"
313
+ files = [
314
+ {file = "ninja-1.11.1.1-py2.py3-none-macosx_10_9_universal2.macosx_10_9_x86_64.macosx_11_0_arm64.macosx_11_0_universal2.whl", hash = "sha256:376889c76d87b95b5719fdd61dd7db193aa7fd4432e5d52d2e44e4c497bdbbee"},
315
+ {file = "ninja-1.11.1.1-py2.py3-none-manylinux1_i686.manylinux_2_5_i686.whl", hash = "sha256:ecf80cf5afd09f14dcceff28cb3f11dc90fb97c999c89307aea435889cb66877"},
316
+ {file = "ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b"},
317
+ {file = "ninja-1.11.1.1-py2.py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:73b93c14046447c7c5cc892433d4fae65d6364bec6685411cb97a8bcf815f93a"},
318
+ {file = "ninja-1.11.1.1-py2.py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:18302d96a5467ea98b68e1cae1ae4b4fb2b2a56a82b955193c637557c7273dbd"},
319
+ {file = "ninja-1.11.1.1-py2.py3-none-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:aad34a70ef15b12519946c5633344bc775a7656d789d9ed5fdb0d456383716ef"},
320
+ {file = "ninja-1.11.1.1-py2.py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:d491fc8d89cdcb416107c349ad1e3a735d4c4af5e1cb8f5f727baca6350fdaea"},
321
+ {file = "ninja-1.11.1.1-py2.py3-none-musllinux_1_1_i686.whl", hash = "sha256:7563ce1d9fe6ed5af0b8dd9ab4a214bf4ff1f2f6fd6dc29f480981f0f8b8b249"},
322
+ {file = "ninja-1.11.1.1-py2.py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:9df724344202b83018abb45cb1efc22efd337a1496514e7e6b3b59655be85205"},
323
+ {file = "ninja-1.11.1.1-py2.py3-none-musllinux_1_1_s390x.whl", hash = "sha256:3e0f9be5bb20d74d58c66cc1c414c3e6aeb45c35b0d0e41e8d739c2c0d57784f"},
324
+ {file = "ninja-1.11.1.1-py2.py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:76482ba746a2618eecf89d5253c0d1e4f1da1270d41e9f54dfbd91831b0f6885"},
325
+ {file = "ninja-1.11.1.1-py2.py3-none-win32.whl", hash = "sha256:fa2ba9d74acfdfbfbcf06fad1b8282de8a7a8c481d9dee45c859a8c93fcc1082"},
326
+ {file = "ninja-1.11.1.1-py2.py3-none-win_amd64.whl", hash = "sha256:95da904130bfa02ea74ff9c0116b4ad266174fafb1c707aa50212bc7859aebf1"},
327
+ {file = "ninja-1.11.1.1-py2.py3-none-win_arm64.whl", hash = "sha256:185e0641bde601e53841525c4196278e9aaf4463758da6dd1e752c0a0f54136a"},
328
+ {file = "ninja-1.11.1.1.tar.gz", hash = "sha256:9d793b08dd857e38d0b6ffe9e6b7145d7c485a42dcfea04905ca0cdb6017cc3c"},
329
+ ]
330
+
331
  [[package]]
332
  name = "numpy"
333
  version = "1.26.3"
server/pyproject.toml CHANGED
@@ -10,6 +10,8 @@ dependencies = [
10
  "uvicorn[standard]>=0.25.0",
11
  "numpy>=1.26.3",
12
  "transformers[torch]>=4.36.2",
 
 
13
  ]
14
  requires-python = ">=3.11"
15
  readme = "README.md"
 
10
  "uvicorn[standard]>=0.25.0",
11
  "numpy>=1.26.3",
12
  "transformers[torch]>=4.36.2",
13
+ "ninja>=1.11.1.1",
14
+ "flash-attn>=2.4.2",
15
  ]
16
  requires-python = ">=3.11"
17
  readme = "README.md"