from huggingface_hub import InferenceClient api_key=os.getenv("HF_TOKEN") client = InferenceClient(api_key=api_key) messages = [ { "role": "user", "content": "Tell me a story" } ] stream = client.chat.completions.create( model="HuggingFaceH4/zephyr-7b-beta", messages=messages, temperature=0.5, max_tokens=2048, top_p=0.7, stream=True ) for chunk in stream: print(chunk.choices[0].delta.content)