Spaces:
Sleeping
Sleeping
File size: 2,805 Bytes
2ac4330 6a1d4c8 5bacd80 6a1d4c8 593e98b 5bacd80 593e98b 5bacd80 6a1d4c8 5bacd80 2ac4330 5bacd80 2ac4330 5bacd80 2ac4330 6a1d4c8 5bacd80 6a1d4c8 5bacd80 6a1d4c8 5bacd80 6a1d4c8 5bacd80 593e98b 5bacd80 593e98b 5bacd80 87569bb 5bacd80 2ac4330 5bacd80 17693f6 699559a 2ac4330 87569bb 699559a 5bacd80 2ac4330 6a1d4c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from huggingface_hub import InferenceClient
def respond(message, history, token, model, system_message, max_tokens, temperature, top_p):
"""
Handle chat responses using the Hugging Face Inference API.
"""
# Handle token and model defaults
token = token.strip()
model = model.strip()
# Default model selection logic
if not token:
# Use default public model when no token is provided
model = "HuggingFaceH4/zephyr-7b-beta"
try:
client = InferenceClient(model=model)
except Exception as e:
yield f"Error initializing client: {str(e)}"
return
else:
# Require model name when token is provided
if not model:
yield "Please provide a model name when using an HF token."
return
try:
client = InferenceClient(model=model, token=token)
except Exception as e:
yield f"Error initializing client: {str(e)}"
return
# Build message history
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Generate response
response = ""
try:
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if chunk.choices and chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
yield response
except Exception as e:
yield f"API Error: {str(e)}"
# Input components
token_input = gr.Textbox(
type="password",
label="HF API Token (leave empty for public model)",
placeholder="hf_XXXXXXXXXXXX"
)
model_input = gr.Textbox(
label="Model Name (required if using token)",
placeholder="Enter model name when using token",
visible=True
)
# Chat interface
demo = gr.ChatInterface(
fn=respond,
title="HF Text Generation Model Tester",
description="Add token + model name for private models",
additional_inputs=[
token_input,
model_input,
gr.Textbox(value="You are helpful AI chatbot who reponds like a mideval knight who is extremely polite, noble and funny.", label="System Message"),
gr.Slider(1, 2048, value=512, label="Max Tokens"),
gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.95, label="Top-p"),
],
)
if __name__ == "__main__":
demo.launch() |