File size: 5,498 Bytes
e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac e04246a a8e97ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
# --- Installation Note ---
# Ensure you have the necessary libraries installed:
# pip install gradio huggingface_hub
# --- Hugging Face Hub Token ---
# The InferenceClient might require a Hugging Face Hub token for certain models or usage.
# Set it as an environment variable HUGGING_FACE_HUB_TOKEN, or log in via `huggingface-cli login`.
# If the model is public and doesn't require login, this might work without a token.
# HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN") # Optional: explicitly get token if needed
client = None
try:
client = InferenceClient(
"HuggingFaceH4/zephyr-7b-beta",
# token=HUGGING_FACE_HUB_TOKEN # Uncomment if you want to pass token explicitly
)
print("InferenceClient initialized successfully.")
except Exception as e:
print(f"Error initializing InferenceClient: {e}")
print("Please ensure the model identifier is correct and you have necessary permissions/token.")
# You might want to exit or raise the error depending on your application structure
# For this Gradio app, we'll let the respond function handle the missing client.
def respond(
message: str,
history: list[tuple[str, str]],
system_message: str = "You are a friendly Chatbot.", # Default value matching UI
max_tokens: int = 512, # Default value matching UI
temperature: float = 0.7, # Default value matching UI
top_p: float = 0.95, # Default value matching UI
):
"""
Chat response function for the Gradio interface.
"""
# --- Client Check ---
if client is None:
yield "Error: InferenceClient could not be initialized. Please check server logs."
return # Stop generation if client is not available
# --- Input Validation (Basic) ---
if not message:
yield "Error: Please enter a message."
return
if not system_message:
system_message = "You are a helpful assistant." # Fallback system message
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
response_text = ""
try:
# Stream the response
for message_chunk in client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
# Check if delta and content exist and are not None
token = message_chunk.choices[0].delta.content
# --- Robust Token Handling ---
if token is not None:
response_text += token
yield response_text # Yield the accumulated response incrementally
except Exception as e:
print(f"Error during API call: {e}")
# Yield a user-friendly error message
yield f"An error occurred while generating the response: {e}"
# --- Gradio Interface Definition ---
demo = gr.ChatInterface(
respond,
chatbot=gr.Chatbot(
height=500,
label="Zephyr 7B Beta",
show_label=True,
bubble_full_width=False, # Optional: Adjust bubble width
),
title="🤖 Zephyr 7B Beta Chat",
description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API. \nEnter your message and adjust settings below.",
examples=[
["Hello, how are you today?"],
["What is the capital of France?"],
["Explain the concept of large language models in simple terms."],
["Write a short poem about the rain."]
],
cache_examples=False, # Set to True to cache example results if desired
additional_inputs=[
gr.Textbox(
value="You are a friendly and helpful chatbot.", # Default system message
label="System Message",
info="The instruction given to the chatbot to guide its behavior.",
),
gr.Slider(
minimum=1,
maximum=2048,
value=512, # Default max tokens
step=1,
label="Max New Tokens",
info="Maximum number of tokens to generate."
),
gr.Slider(
minimum=0.1,
# Max temperature adjusted: values > 1.0 often degrade quality
maximum=1.0,
value=0.7, # Default temperature
step=0.1,
label="Temperature",
info="Controls randomness. Lower values make output more focused, higher values make it more diverse."
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95, # Default top-p
step=0.05,
label="Top-p (nucleus sampling)",
info="Considers only the most probable tokens with cumulative probability p. Helps prevent low-probability tokens."
),
],
additional_inputs_accordion_name="⚙️ Advanced Settings" # Group settings
)
if __name__ == "__main__":
# Launch the Gradio app
demo.launch(
# share=True # Uncomment to create a temporary public link (use with caution)
# server_name="0.0.0.0" # Uncomment to allow access from your local network
# auth=("user", "password") # Optional: Add basic authentication
) |