File size: 5,168 Bytes
038f313
77298b9
7de1759
038f313
880ced6
 
e13eb1b
038f313
e13eb1b
038f313
 
 
 
e13eb1b
038f313
 
27c8b8d
 
 
038f313
 
 
3a64d68
98674ca
27c8b8d
 
038f313
e13eb1b
27c8b8d
e13eb1b
27c8b8d
 
 
 
 
7de1759
27c8b8d
f7c4208
27c8b8d
52ad57a
 
038f313
27c8b8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77298b9
 
27c8b8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542c2ac
e13eb1b
f7c4208
27c8b8d
77298b9
7de1759
27c8b8d
 
 
 
e7683ca
8696822
27c8b8d
77298b9
27c8b8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7de1759
27c8b8d
 
 
 
 
10ffb1d
27c8b8d
e7683ca
77298b9
27c8b8d
77298b9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import gradio as gr
from openai import OpenAI
import os

# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

# Initialize the OpenAI client with the Hugging Face Inference API endpoint
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model,
    selected_model
):
    """
    Handles the chatbot response generation.
    """
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
    print(f"Custom model: {custom_model}")
    print(f"Selected model: {selected_model}")

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    # Construct the messages array required by the API
    messages = [{"role": "system", "content": system_message}]

    # Add conversation history to the context
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            messages.append({"role": "user", "content": user_part})
            print(f"Added user message to context: {user_part}")
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})
            print(f"Added assistant message to context: {assistant_part}")

    # Append the latest user message
    messages.append({"role": "user", "content": message})

    # Determine which model to use
    model_to_use = (
        custom_model.strip()
        if custom_model.strip() != ""
        else selected_model.strip()
    )
    print(f"Model selected for inference: {model_to_use}")

    # Start with an empty string to build the response as tokens stream in
    response = ""
    print("Sending request to OpenAI API.")

    # Make the streaming request to the HF Inference API via openai-like client
    for message_chunk in client.chat.completions.create(
        model=model_to_use,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        seed=seed,
        messages=messages,
    ):
        # Extract the token text from the response chunk
        token_text = message_chunk.choices[0].delta.content
        print(f"Received token: {token_text}")
        response += token_text
        yield response

    print("Completed response generation.")

# Predefined list of placeholder models for the Featured Models accordion
models_list = [
    "meta-llama/Llama-3.3-70B-Instruct",
    "bigscience/bloom-7b1",
    "EleutherAI/gpt-neo-2.7B",
    "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
    "HuggingFace/distilgpt2",
]

# Function to filter models based on search input
def filter_models(search_term):
    filtered_models = [m for m in models_list if search_term.lower() in m.lower()]
    return gr.update(choices=filtered_models)

# Create a Chatbot component with a specified height
chatbot = gr.Chatbot(height=600)
print("Chatbot interface created.")

# Create the Gradio ChatInterface
# Added "Featured Models" accordion and integrated filtering
demo = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(value="", label="System message"),
        gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
        gr.Slider(
            minimum=-2.0,
            maximum=2.0,
            value=0.0,
            step=0.1,
            label="Frequency Penalty"
        ),
        gr.Slider(
            minimum=-1,
            maximum=65535,  # Arbitrary upper limit for demonstration
            value=-1,
            step=1,
            label="Seed (-1 for random)"
        ),
        gr.Textbox(
            value="",
            label="Custom Model",
            info="(Optional) Provide a custom Hugging Face model path. This will override the default model if not empty.",
        ),
        # Add Featured Models accordion
        gr.Accordion("Featured Models", open=True, children=[
            gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1).change(
                filter_models, inputs=["value"], outputs="choices"
            ),
            gr.Radio(
                label="Select a featured model",
                value="meta-llama/Llama-3.3-70B-Instruct",
                choices=models_list,
                elem_id="model-radio",
            )
        ]),
    ],
    outputs=gr.Chatbot(height=600),
    theme="Nymbo/Nymbo_Theme",
)

print("Gradio interface initialized.")

if __name__ == "__main__":
    print("Launching the demo application.")
    demo.launch()