import gradio as gr import spaces import torch import os from transformers import AutoTokenizer, AutoModelForCausalLM import huggingface_hub hf_key = os.environ["HF_TOKEN"] huggingface_hub.login(token=hf_key) tokenizer = AutoTokenizer.from_pretrained( "bigcode/starcoderbase-3b", use_auth_token=hf_key ) vardecoder_model = AutoModelForCausalLM.from_pretrained( "ejschwartz/resym-vardecoder", torch_dtype=torch.bfloat16, device_map="auto" ) zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' 🤔 @spaces.GPU def greet(n): print(zero.device) # <-- 'cuda:0' 🤗 return f"Hello {zero + n} Tensor" demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text()) demo.launch()