from diffusers import AutoencoderOobleck import torch from model import Voxify from huggingface_hub import snapshot_download from safetensors.torch import load_file import json class VoxifyInfereence: def __init__(self,name='declare-lab/TangoFlux'): self.vae = AutoencoderOobleck.from_pretrained("stabilityai/stable-audio-open-1.0",subfolder='vae') path=snapshot_download(repo_id=name) weights=load_file("{}/tangoflux.safetensors".format(path)) with open("{}/config.json".format(path), "r") as f: config = json.load(f) self.model = Voxify(config) self.model.load_state_dict(weights,strict=False) def generate(self, prompt,steps=25,duration=10,guidance_scale=4.5): with torch.no_grad(): latent=self.model.inference_flow(prompt, duration=duration, num_inference_steps=steps, guidance_scale=guidance_scale) wave = self.vae.decode(latent.transpose(2,1)).sample.cpu()[0] return wave