File size: 4,694 Bytes
e04246a 43de95d a8e97ac e04246a 43de95d a8e97ac 43de95d a8e97ac 43de95d a8e97ac 43de95d e04246a a8e97ac 43de95d e04246a a8e97ac 43de95d a8e97ac e04246a 43de95d a8e97ac 43de95d e04246a 43de95d e04246a 43de95d a8e97ac 43de95d a8e97ac 43de95d a8e97ac 43de95d e04246a a8e97ac 43de95d e04246a 43de95d a8e97ac 43de95d a8e97ac 43de95d e04246a 43de95d e04246a 43de95d e04246a 43de95d e04246a 43de95d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
from huggingface_hub import InferenceClient
from huggingface_hub.inference_api import InferenceApiException
import os
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
**Note:** You might need to authenticate with Hugging Face for this to work reliably.
Run `huggingface-cli login` in your terminal or set the HUGGING_FACE_HUB_TOKEN environment variable.
Alternatively, pass your token directly: InferenceClient(token="hf_YOUR_TOKEN")
"""
# Initialize the Inference Client
# It will try to use HUGGING_FACE_HUB_TOKEN environment variable or cached login
try:
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
except Exception as e:
print(f"Error initializing InferenceClient: {e}")
# Optionally, provide a default token if needed and available
# token = os.getenv("HUGGING_FACE_HUB_TOKEN")
# if token:
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=token)
# else:
# raise ValueError("Could not initialize InferenceClient. Ensure you are logged in or provide a token.") from e
# For now, let's just raise it if initialization fails fundamentally
raise
def respond(
message: str,
history: list[tuple[str | None, str | None]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
):
"""
Generates a response using the Hugging Face Inference API.
Args:
message: The user's input message.
history: A list of tuples representing the conversation history.
Each tuple is (user_message, bot_message).
system_message: The system prompt to guide the model.
max_tokens: The maximum number of new tokens to generate.
temperature: Controls randomness (higher = more random).
top_p: Nucleus sampling parameter.
Yields:
The generated response incrementally.
"""
messages = [{"role": "system", "content": system_message}]
# Add conversation history
for user_msg, bot_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
# Add the latest user message
messages.append({"role": "user", "content": message})
response = ""
try:
# Start streaming the response
for msg_chunk in client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
# Check if there's content in the delta
token = msg_chunk.choices[0].delta.content
if token: # Add check for empty/None token
response += token
yield response # Yield the accumulated response so far
except InferenceApiException as e:
print(f"Inference API Error: {e}")
yield f"Sorry, I encountered an error: {e}"
except Exception as e:
print(f"An unexpected error occurred: {e}")
yield f"Sorry, an unexpected error occurred: {e}"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
chatbot=gr.Chatbot(height=400), # Adjust chatbot height if desired
textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
title="Zephyr 7B Beta Chat",
description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API.",
theme="soft", # Optional: Apply a theme
examples=[
["Hello!"],
["Explain the concept of Large Language Models in simple terms."],
["Write a short poem about the moon."],
],
cache_examples=False, # Set to True to cache example results
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
additional_inputs=[
gr.Textbox(value="You are a friendly and helpful chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"), # Note: Max temp often capped lower (e.g., 1.0 or 2.0)
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
additional_inputs_accordion=gr.Accordion(label="Advanced Options", open=False), # Group additional inputs
)
if __name__ == "__main__":
demo.launch() |