Update app.py
Browse files
app.py
CHANGED
@@ -95,14 +95,14 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
95 |
"m-a-p/YuE-s1-7B-anneal-en-cot",
|
96 |
torch_dtype=torch.float16,
|
97 |
attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
|
98 |
-
)
|
99 |
-
model.to(device)
|
100 |
model.eval()
|
101 |
|
102 |
# Apply dynamic quantization
|
103 |
model = torch.quantization.quantize_dynamic(
|
104 |
model, {torch.nn.Linear}, dtype=torch.qint8
|
105 |
-
)
|
|
|
106 |
|
107 |
basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
|
108 |
resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
|
|
|
95 |
"m-a-p/YuE-s1-7B-anneal-en-cot",
|
96 |
torch_dtype=torch.float16,
|
97 |
attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
|
98 |
+
).to(device)
|
|
|
99 |
model.eval()
|
100 |
|
101 |
# Apply dynamic quantization
|
102 |
model = torch.quantization.quantize_dynamic(
|
103 |
model, {torch.nn.Linear}, dtype=torch.qint8
|
104 |
+
).to(device)
|
105 |
+
model.eval()
|
106 |
|
107 |
basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
|
108 |
resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
|