from huggingface_hub import InferenceClient import gradio as gr """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond( prompt: str, history, ): messages = [{"role": "system", "content": "You are a friendly chatbot"}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) yield messages messages.append({"role": "user", "content": prompt}) history.append([prompt, None]) response = "" for message in client.chat_completion( messages, stream=True, ): response += message.choices[0].delta.content or "" history[-1][1] = response yield history with gr.Blocks() as demo: gr.Markdown("# Chat with Hugging Face Zephyr 7b 🤗") chatbot = gr.Chatbot( label="Agent", avatar_images=( None, "https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png", ), ) prompt = gr.Textbox(lines=1, label="Chat Message") prompt.submit(respond, [prompt, chatbot], [chatbot]) if __name__ == "__main__": demo.launch()