YuE-music-generator-demo-zero

Paused

App Files Files Community

KingNish commited on Feb 1

Commit

c874206

verified ·

1 Parent(s): 018f313

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -17

app.py CHANGED Viewed

@@ -73,18 +73,15 @@ from models.soundstream_hubert_new import SoundStream
 device = "cuda:0"
 model = AutoModelForCausalLM.from_pretrained(
-    "m-a-p/YuE-s1-7B-anneal-en-cot",
     torch_dtype=torch.float16,
     attn_implementation="flash_attention_2",
-    # low_cpu_mem_usage=True,
 ).to(device)
 model.eval()
 basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
 resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
-#config_path = './xcodec_mini_infer/decoders/config.yaml' # removed vocoder
-#vocal_decoder_path = './xcodec_mini_infer/decoders/decoder_131000.pth' # removed vocoder
-#inst_decoder_path = './xcodec_mini_infer/decoders/decoder_151000.pth' # removed vocoder
 mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
@@ -94,18 +91,8 @@ model_config = OmegaConf.load(basic_model_config)
 codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
 parameter_dict = torch.load(resume_path, map_location='cpu')
 codec_model.load_state_dict(parameter_dict['codec_model'])
-# codec_model = torch.compile(codec_model)
 codec_model.eval()
-# Preload and compile vocoders # removed vocoder
-#vocal_decoder, inst_decoder = build_codec_model(config_path, vocal_decoder_path, inst_decoder_path)
-#vocal_decoder.to(device)
-#inst_decoder.to(device)
-#vocal_decoder = torch.compile(vocal_decoder)
-#inst_decoder = torch.compile(inst_decoder)
-#vocal_decoder.eval()
-#inst_decoder.eval()
 @spaces.GPU(duration=120)
 def generate_music(
@@ -309,8 +296,8 @@ def generate_music(
                         continue
                     # mix
                     recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('instrumental', 'mixed'))
-                    vocal_stem, sr = sf.read(inst_path)
-                    instrumental_stem, _ = sf.read(vocal_path)
                     mix_stem = (vocal_stem + instrumental_stem) / 1
                     return (sr, (mix_stem * 32767).astype(np.int16)), (sr, (vocal_stem * 32767).astype(np.int16)), (sr, (instrumental_stem * 32767).astype(np.int16))
             except Exception as e:

 device = "cuda:0"
 model = AutoModelForCausalLM.from_pretrained(
+    "m-a-p/YuE-s1-7B-anneal-en-icl", # "m-a-p/YuE-s1-7B-anneal-en-cot",
     torch_dtype=torch.float16,
     attn_implementation="flash_attention_2",
+    low_cpu_mem_usage=True,
 ).to(device)
 model.eval()
 basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
 resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
 mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
 codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
 parameter_dict = torch.load(resume_path, map_location='cpu')
 codec_model.load_state_dict(parameter_dict['codec_model'])
 codec_model.eval()
 @spaces.GPU(duration=120)
 def generate_music(
                         continue
                     # mix
                     recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('instrumental', 'mixed'))
+                    vocal_stem, sr = sf.read(vocal_path)
+                    instrumental_stem, _ = sf.read(inst_path)
                     mix_stem = (vocal_stem + instrumental_stem) / 1
                     return (sr, (mix_stem * 32767).astype(np.int16)), (sr, (vocal_stem * 32767).astype(np.int16)), (sr, (instrumental_stem * 32767).astype(np.int16))
             except Exception as e: