Spaces:
Sleeping
Sleeping
File size: 2,667 Bytes
2ac4330 6a1d4c8 5bacd80 6a1d4c8 5bacd80 6a1d4c8 5bacd80 2ac4330 5bacd80 2ac4330 5bacd80 2ac4330 6a1d4c8 5bacd80 6a1d4c8 5bacd80 6a1d4c8 5bacd80 6a1d4c8 5bacd80 87569bb 5bacd80 2ac4330 5bacd80 2ac4330 87569bb 5bacd80 2ac4330 6a1d4c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from huggingface_hub import InferenceClient
def respond(message, history, token, model, system_message, max_tokens, temperature, top_p):
"""
Handle chat responses using the Hugging Face Inference API.
"""
# Handle token and model defaults
token = token.strip()
model = model.strip()
# Default model selection logic
if not token:
model = "gpt2" # Default public model that doesn't require token
try:
client = InferenceClient(model=model)
except Exception as e:
yield f"Error initializing client: {str(e)}"
return
else:
model = model or "meta-llama/Llama-3.1-8B-Instruct" # Default private model
try:
client = InferenceClient(model=model, token=token)
except Exception as e:
yield f"Error initializing client: {str(e)}"
return
# Build message history
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# Generate response
response = ""
try:
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if chunk.choices and chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
yield response
except Exception as e:
yield f"API Error: {str(e)}"
# Input components
token_input = gr.Textbox(
type="password",
label="HF API Token (leave empty for public models)",
placeholder="hf_XXXXXXXXXXXX"
)
model_input = gr.Dropdown(
label="Model Name",
choices=[
"gpt2",
"HuggingFaceH4/zephyr-7b-beta",
"meta-llama/Llama-3.1-8B-Instruct"
],
value="gpt2"
)
# Chat interface
demo = gr.ChatInterface(
fn=respond,
title="HF Model Chat Interface",
description="Enter token for private models or use public models without token",
additional_inputs=[
token_input,
model_input,
gr.Textbox(value="You are helpful AI.", label="System Message"),
gr.Slider(1, 2048, value=512, label="Max Tokens"),
gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.95, label="Top-p"),
],
)
if __name__ == "__main__":
demo.launch() |