import gradio as gr import spaces import torch import torch from transformers import AutoTokenizer, AutoModelForCausalLM from huggingface_hub import login hf_key = os.environ['HF_TOKEN'] login(token = hf_key) tokenizer = AutoTokenizer.from_pretrained('bigcode/starcoderbase-3b', use_auth_token=hf_key) vardecoder_model = AutoModelForCausalLM.from_pretrained( "ejschwartz/resym-vardecoder", torch_dtype=torch.bfloat16, device_map='auto' ) zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' 🤔 @spaces.GPU def greet(n): print(zero.device) # <-- 'cuda:0' 🤗 return f"Hello {zero + n} Tensor" demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text()) demo.launch()