File size: 2,755 Bytes
2ac4330
 
 
6a1d4c8
 
 
 
5bacd80
 
 
 
 
6a1d4c8
593e98b
 
5bacd80
 
 
 
 
 
593e98b
 
 
 
5bacd80
 
 
 
 
6a1d4c8
5bacd80
2ac4330
5bacd80
 
 
 
 
2ac4330
 
5bacd80
2ac4330
6a1d4c8
5bacd80
6a1d4c8
 
 
 
 
 
5bacd80
 
6a1d4c8
 
5bacd80
6a1d4c8
5bacd80
 
 
0781621
5bacd80
 
593e98b
0781621
593e98b
 
5bacd80
87569bb
e6b630b
 
 
 
 
 
 
 
f83a617
e6b630b
 
 
 
 
2ac4330
 
6a1d4c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from huggingface_hub import InferenceClient

def respond(message, history, token, model, system_message, max_tokens, temperature, top_p):
    """
    Handle chat responses using the Hugging Face Inference API.
    """
    # Handle token and model defaults
    token = token.strip()
    model = model.strip()
    
    # Default model selection logic
    if not token:
        # Use default public model when no token is provided
        model = "HuggingFaceH4/zephyr-7b-beta"
        try:
            client = InferenceClient(model=model)
        except Exception as e:
            yield f"Error initializing client: {str(e)}"
            return
    else:
        # Require model name when token is provided
        if not model:
            yield "Please provide a model name when using an HF token."
            return
        try:
            client = InferenceClient(model=model, token=token)
        except Exception as e:
            yield f"Error initializing client: {str(e)}"
            return

    # Build message history
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    # Generate response
    response = ""
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            if chunk.choices and chunk.choices[0].delta.content:
                response += chunk.choices[0].delta.content
                yield response
    except Exception as e:
        yield f"API Error: {str(e)}"

# Input components
token_input = gr.Textbox(
    type="password", 
    label="HF API Token",
    placeholder="hf_XXXXXXXXXXXX"
)
model_input = gr.Textbox(
    label="Model Name (required with HF token)",
    placeholder="Enter model name when using token",
    visible=True
)

# Chat interface
demo = gr.ChatInterface(
    fn=respond,
    title="HF Text Generation Model Tester",
    description="Add token + model name",
    additional_inputs=[
        token_input,
        model_input,
        gr.Textbox(value="You are helpful AI chatbot who responds like a mideval knight who is extremely polite, noble and funny.", label="System Message"),
        gr.Slider(1, 2048, value=512, label="Max Tokens"),
        gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.95, label="Top-p"),
    ],
)

if __name__ == "__main__":
    demo.launch()