Update app.py
Browse files
app.py
CHANGED
@@ -63,7 +63,7 @@ from tqdm import tqdm
|
|
63 |
from einops import rearrange
|
64 |
from codecmanipulator import CodecManipulator
|
65 |
from mmtokenizer import _MMSentencePieceTokenizer
|
66 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList
|
67 |
import glob
|
68 |
import time
|
69 |
import copy
|
@@ -74,16 +74,14 @@ from post_process_audio import replace_low_freq_with_energy_matched
|
|
74 |
|
75 |
device = "cuda:0"
|
76 |
|
77 |
-
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
78 |
-
|
79 |
model = AutoModelForCausalLM.from_pretrained(
|
80 |
"m-a-p/YuE-s1-7B-anneal-en-cot",
|
81 |
-
torch_dtype=
|
82 |
-
|
83 |
quantization_config=quantization_config,
|
84 |
low_cpu_mem_usage=True,
|
85 |
-
device_map="auto"
|
86 |
-
)
|
87 |
model.eval()
|
88 |
|
89 |
basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
|
|
|
63 |
from einops import rearrange
|
64 |
from codecmanipulator import CodecManipulator
|
65 |
from mmtokenizer import _MMSentencePieceTokenizer
|
66 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList
|
67 |
import glob
|
68 |
import time
|
69 |
import copy
|
|
|
74 |
|
75 |
device = "cuda:0"
|
76 |
|
|
|
|
|
77 |
model = AutoModelForCausalLM.from_pretrained(
|
78 |
"m-a-p/YuE-s1-7B-anneal-en-cot",
|
79 |
+
torch_dtype=torch.bfloat16,
|
80 |
+
attn_implementation="flash_attention_2",
|
81 |
quantization_config=quantization_config,
|
82 |
low_cpu_mem_usage=True,
|
83 |
+
# device_map="auto"
|
84 |
+
).to(device)
|
85 |
model.eval()
|
86 |
|
87 |
basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
|