import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIteratorStreamer import os # PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' torch.random.manual_seed(0) model = AutoModelForCausalLM.from_pretrained( "NyxKrage/Microsoft_Phi-4", device_map="cuda", torch_dtype="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("NyxKrage/Microsoft_Phi-4") messages = [ {"role": "system", "content": "You are a helpful AI assistant."}, {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}, {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."}, {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"}, ] pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) streamer = TextIteratorStreamer(tokenizer) generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False, "streamer": streamer, } @spaces.GPU def tuili(): model.generate(messages, **generation_args) tuili() for new_text in streamer: print(new_text)