KingNish commited on
Commit
c874206
·
verified ·
1 Parent(s): 018f313

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -17
app.py CHANGED
@@ -73,18 +73,15 @@ from models.soundstream_hubert_new import SoundStream
73
  device = "cuda:0"
74
 
75
  model = AutoModelForCausalLM.from_pretrained(
76
- "m-a-p/YuE-s1-7B-anneal-en-cot",
77
  torch_dtype=torch.float16,
78
  attn_implementation="flash_attention_2",
79
- # low_cpu_mem_usage=True,
80
  ).to(device)
81
  model.eval()
82
 
83
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
84
  resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
85
- #config_path = './xcodec_mini_infer/decoders/config.yaml' # removed vocoder
86
- #vocal_decoder_path = './xcodec_mini_infer/decoders/decoder_131000.pth' # removed vocoder
87
- #inst_decoder_path = './xcodec_mini_infer/decoders/decoder_151000.pth' # removed vocoder
88
 
89
  mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
90
 
@@ -94,18 +91,8 @@ model_config = OmegaConf.load(basic_model_config)
94
  codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
95
  parameter_dict = torch.load(resume_path, map_location='cpu')
96
  codec_model.load_state_dict(parameter_dict['codec_model'])
97
- # codec_model = torch.compile(codec_model)
98
  codec_model.eval()
99
 
100
- # Preload and compile vocoders # removed vocoder
101
- #vocal_decoder, inst_decoder = build_codec_model(config_path, vocal_decoder_path, inst_decoder_path)
102
- #vocal_decoder.to(device)
103
- #inst_decoder.to(device)
104
- #vocal_decoder = torch.compile(vocal_decoder)
105
- #inst_decoder = torch.compile(inst_decoder)
106
- #vocal_decoder.eval()
107
- #inst_decoder.eval()
108
-
109
 
110
  @spaces.GPU(duration=120)
111
  def generate_music(
@@ -309,8 +296,8 @@ def generate_music(
309
  continue
310
  # mix
311
  recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('instrumental', 'mixed'))
312
- vocal_stem, sr = sf.read(inst_path)
313
- instrumental_stem, _ = sf.read(vocal_path)
314
  mix_stem = (vocal_stem + instrumental_stem) / 1
315
  return (sr, (mix_stem * 32767).astype(np.int16)), (sr, (vocal_stem * 32767).astype(np.int16)), (sr, (instrumental_stem * 32767).astype(np.int16))
316
  except Exception as e:
 
73
  device = "cuda:0"
74
 
75
  model = AutoModelForCausalLM.from_pretrained(
76
+ "m-a-p/YuE-s1-7B-anneal-en-icl", # "m-a-p/YuE-s1-7B-anneal-en-cot",
77
  torch_dtype=torch.float16,
78
  attn_implementation="flash_attention_2",
79
+ low_cpu_mem_usage=True,
80
  ).to(device)
81
  model.eval()
82
 
83
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
84
  resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
 
 
 
85
 
86
  mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
87
 
 
91
  codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
92
  parameter_dict = torch.load(resume_path, map_location='cpu')
93
  codec_model.load_state_dict(parameter_dict['codec_model'])
 
94
  codec_model.eval()
95
 
 
 
 
 
 
 
 
 
 
96
 
97
  @spaces.GPU(duration=120)
98
  def generate_music(
 
296
  continue
297
  # mix
298
  recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('instrumental', 'mixed'))
299
+ vocal_stem, sr = sf.read(vocal_path)
300
+ instrumental_stem, _ = sf.read(inst_path)
301
  mix_stem = (vocal_stem + instrumental_stem) / 1
302
  return (sr, (mix_stem * 32767).astype(np.int16)), (sr, (vocal_stem * 32767).astype(np.int16)), (sr, (instrumental_stem * 32767).astype(np.int16))
303
  except Exception as e: