import gradio as gr import spaces import torch from transformers import AutoTokenizer token=os.environ.get("key_") tokenizer = AutoTokenizer.from_pretrained("wasmdashai/vtk",token=token) model=VitsModel.from_pretrained("wasmdashai/vtk",token=token).cuda() zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' 🤔 import torch @spaces.GPU def modelspeech(text): inputs = tokenizer(text, return_tensors="pt").cuda() with torch.no_grad(): wav = model(input_ids=inputs["input_ids"]).waveform.cpu().numpy().reshape(-1)#.detach() return model.config.sampling_rate,wav#remove_noise_nr(wav) demo = gr.Interface(fn=modelspeech, inputs=["text", outputs=["audio"]) demo.launch()