sachin
commited on
Commit
·
7222675
1
Parent(s):
453f545
add- torch compile
Browse files- src/server/main.py +3 -0
src/server/main.py
CHANGED
@@ -386,6 +386,9 @@ class TranslateManager:
|
|
386 |
torch_dtype=torch.float16,
|
387 |
attn_implementation="flash_attention_2"
|
388 |
).to(self.device_type)
|
|
|
|
|
|
|
389 |
return tokenizer, model
|
390 |
|
391 |
class ModelManager:
|
|
|
386 |
torch_dtype=torch.float16,
|
387 |
attn_implementation="flash_attention_2"
|
388 |
).to(self.device_type)
|
389 |
+
|
390 |
+
model = torch.compile(model, mode="reduce-overhead")
|
391 |
+
print("Model compiled with torch.compile")
|
392 |
return tokenizer, model
|
393 |
|
394 |
class ModelManager:
|