Spaces:
Sleeping
Sleeping
File size: 2,755 Bytes
2ac4330 6a1d4c8 5bacd80 6a1d4c8 593e98b 5bacd80 593e98b 5bacd80 6a1d4c8 5bacd80 2ac4330 5bacd80 2ac4330 5bacd80 2ac4330 6a1d4c8 5bacd80 6a1d4c8 5bacd80 6a1d4c8 5bacd80 6a1d4c8 5bacd80 0781621 5bacd80 593e98b 0781621 593e98b 5bacd80 87569bb e6b630b f83a617 e6b630b 2ac4330 6a1d4c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from huggingface_hub import InferenceClient
def respond(message, history, token, model, system_message, max_tokens, temperature, top_p):
"""
Handle chat responses using the Hugging Face Inference API.
"""
# Handle token and model defaults
token = token.strip()
model = model.strip()
# Default model selection logic
if not token:
# Use default public model when no token is provided
model = "HuggingFaceH4/zephyr-7b-beta"
try:
client = InferenceClient(model=model)
except Exception as e:
yield f"Error initializing client: {str(e)}"
return
else:
# Require model name when token is provided
if not model:
yield "Please provide a model name when using an HF token."
return
try:
client = InferenceClient(model=model, token=token)
except Exception as e:
yield f"Error initializing client: {str(e)}"
return
# Build message history
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Generate response
response = ""
try:
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if chunk.choices and chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
yield response
except Exception as e:
yield f"API Error: {str(e)}"
# Input components
token_input = gr.Textbox(
type="password",
label="HF API Token",
placeholder="hf_XXXXXXXXXXXX"
)
model_input = gr.Textbox(
label="Model Name (required with HF token)",
placeholder="Enter model name when using token",
visible=True
)
# Chat interface
demo = gr.ChatInterface(
fn=respond,
title="HF Text Generation Model Tester",
description="Add token + model name",
additional_inputs=[
token_input,
model_input,
gr.Textbox(value="You are helpful AI chatbot who responds like a mideval knight who is extremely polite, noble and funny.", label="System Message"),
gr.Slider(1, 2048, value=512, label="Max Tokens"),
gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.95, label="Top-p"),
],
)
if __name__ == "__main__":
demo.launch() |