Spaces:

FunAudioLLM
/

InspireMusic

Runtime error

InspireMusic / inspiremusic /music_tokenizer /vqvae.py

chong.zhang

init

5827423 7 months ago

2.05 kB

	# Copyright (c) 2024 Alibaba Inc
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import json

	import torch
	import torch.nn as nn
	from inspiremusic.music_tokenizer.env import AttrDict
	from inspiremusic.music_tokenizer.models import Encoder
	from inspiremusic.music_tokenizer.models import Generator
	from inspiremusic.music_tokenizer.models import Quantizer


	class VQVAE(nn.Module):
	def __init__(self,
	config_path,
	ckpt_path,
	with_encoder=False):
	super(VQVAE, self).__init__()
	ckpt = torch.load(ckpt_path)
	with open(config_path) as f:
	data = f.read()
	json_config = json.loads(data)
	self.h = AttrDict(json_config)
	self.quantizer = Quantizer(self.h)
	self.generator = Generator(self.h)
	self.generator.load_state_dict(ckpt['generator'])
	self.quantizer.load_state_dict(ckpt['quantizer'])
	if with_encoder:
	self.encoder = Encoder(self.h)
	self.encoder.load_state_dict(ckpt['encoder'])

	def forward(self, x):
	# x is the codebook
	# x.shape (B, T, Nq)
	quant_emb = self.quantizer.embed(x)
	return self.generator(quant_emb)

	def encode(self, x):
	batch_size = x.size(0)
	if len(x.shape) == 3 and x.shape[-1] == 1:
	x = x.squeeze(-1)
	c = self.encoder(x.unsqueeze(1))
	q, loss_q, c = self.quantizer(c)
	c = [code.reshape(batch_size, -1) for code in c]
	# shape: [N, T, 4]
	return torch.stack(c, -1)