KingNish commited on
Commit
a75bb02
·
verified ·
1 Parent(s): b0328a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -6
app.py CHANGED
@@ -97,12 +97,7 @@ model = AutoModelForCausalLM.from_pretrained(
97
  attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
98
  ).to(device)
99
  model.eval()
100
-
101
- # Apply dynamic quantization
102
- model = torch.quantization.quantize_dynamic(
103
- model, {torch.nn.Linear}, dtype=torch.qint8
104
- ).to(device)
105
- model.eval()
106
 
107
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
108
  resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
 
97
  attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
98
  ).to(device)
99
  model.eval()
100
+ # gonna use either gguf or vllm later
 
 
 
 
 
101
 
102
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
103
  resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'