Spaces:
Running
Running
File size: 1,188 Bytes
d3d1e52 69a8ba9 d3d1e52 69a8ba9 2d66e4d d3d1e52 471735c d3d1e52 3706199 d3d1e52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from huggingface_hub import InferenceClient
import gradio as gr
client = InferenceClient()
def respond(
prompt: str,
history,
):
if not history:
history = [{"role": "system", "content": "You are a friendly chatbot"}]
history.append({"role": "user", "content": prompt})
yield history
response = {"role": "assistant", "content": ""}
for message in client.chat_completion(
history,
temperature=0.95,
top_p=0.9,
max_tokens=512,
stream=True,
model="HuggingFaceH4/zephyr-7b-beta"
):
response["content"] += message.choices[0].delta.content or ""
yield history + [response]
with gr.Blocks() as demo:
gr.Markdown("# Chat with Hugging Face Zephyr 7b 🤗")
chatbot = gr.Chatbot(
label="Agent",
type="messages",
avatar_images=(
None,
"https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png",
),
)
prompt = gr.Textbox(max_lines=1, label="Chat Message")
prompt.submit(respond, [prompt, chatbot], [chatbot])
prompt.submit(lambda: "", None, [prompt])
if __name__ == "__main__":
demo.launch() |