Update app.py
Browse files
app.py
CHANGED
@@ -97,12 +97,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
97 |
attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
|
98 |
).to(device)
|
99 |
model.eval()
|
100 |
-
|
101 |
-
# Apply dynamic quantization
|
102 |
-
model = torch.quantization.quantize_dynamic(
|
103 |
-
model, {torch.nn.Linear}, dtype=torch.qint8
|
104 |
-
).to(device)
|
105 |
-
model.eval()
|
106 |
|
107 |
basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
|
108 |
resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
|
|
|
97 |
attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
|
98 |
).to(device)
|
99 |
model.eval()
|
100 |
+
# gonna use either gguf or vllm later
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
|
103 |
resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
|