KingNish commited on
Commit
0d14459
·
verified ·
1 Parent(s): f91901a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -63,7 +63,7 @@ from tqdm import tqdm
63
  from einops import rearrange
64
  from codecmanipulator import CodecManipulator
65
  from mmtokenizer import _MMSentencePieceTokenizer
66
- from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList, BitsAndBytesConfig
67
  import glob
68
  import time
69
  import copy
@@ -74,16 +74,14 @@ from post_process_audio import replace_low_freq_with_energy_matched
74
 
75
  device = "cuda:0"
76
 
77
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
78
-
79
  model = AutoModelForCausalLM.from_pretrained(
80
  "m-a-p/YuE-s1-7B-anneal-en-cot",
81
- torch_dtype="auto",
82
- # attn_implementation="flash_attention_2",
83
  quantization_config=quantization_config,
84
  low_cpu_mem_usage=True,
85
- device_map="auto"
86
- )
87
  model.eval()
88
 
89
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
 
63
  from einops import rearrange
64
  from codecmanipulator import CodecManipulator
65
  from mmtokenizer import _MMSentencePieceTokenizer
66
+ from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList
67
  import glob
68
  import time
69
  import copy
 
74
 
75
  device = "cuda:0"
76
 
 
 
77
  model = AutoModelForCausalLM.from_pretrained(
78
  "m-a-p/YuE-s1-7B-anneal-en-cot",
79
+ torch_dtype=torch.bfloat16,
80
+ attn_implementation="flash_attention_2",
81
  quantization_config=quantization_config,
82
  low_cpu_mem_usage=True,
83
+ # device_map="auto"
84
+ ).to(device)
85
  model.eval()
86
 
87
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'