import gradio as gr from openai import OpenAI import os # Retrieve the access token from the environment variable ACCESS_TOKEN = os.getenv("HF_TOKEN") print("Access token loaded.") # Initialize the OpenAI client with the Hugging Face Inference API endpoint client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=ACCESS_TOKEN, ) print("OpenAI client initialized.") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, frequency_penalty, seed, model_selection, custom_model ): """ This function handles the chatbot response. """ selected_model = custom_model if custom_model.strip() != "" else model_selection print(f"Selected model: {selected_model}") if seed == -1: seed = None messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message_chunk in client.chat.completions.create( model=selected_model, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, frequency_penalty=frequency_penalty, seed=seed, messages=messages, ): token_text = message_chunk.choices[0].delta.content response += token_text yield response # Create a Chatbot component with a specified height chatbot = gr.Chatbot(height=600) # Define placeholder models featured_models = [ "meta-llama/Llama-3.3-70B-Instruct", "gpt2", "bert-base-uncased", "facebook/bart-base", "google/flan-t5-base" ] # Create the Gradio ChatInterface with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo: gr.Markdown("# Serverless Text Generation Hub") with gr.Tab("Basic Settings"): with gr.Row(): with gr.Column(): # Textbox for system message system_message = gr.Textbox(value="", label="System message") with gr.Row(): with gr.Column(): # Model selection with gr.Accordion("Featured Models", open=True): model_search = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...") model = gr.Radio(label="Select a model", choices=featured_models, value="meta-llama/Llama-3.3-70B-Instruct") def filter_models(search_term): filtered_models = [m for m in featured_models if search_term.lower() in m.lower()] return gr.update(choices=filtered_models) model_search.change(filter_models, inputs=model_search, outputs=model) with gr.Row(): with gr.Column(): # Custom model input custom_model = gr.Textbox(label="Custom Model", placeholder="Enter a custom model name") with gr.Tab("Advanced Settings"): with gr.Row(): max_tokens = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens") temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") with gr.Row(): top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P") frequency_penalty = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty") with gr.Row(): seed = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)") with gr.Tab("Information"): with gr.Accordion("Featured Models", open=False): gr.Markdown( """
Model Name | Description |
---|---|
meta-llama/Llama-3.3-70B-Instruct | Highly capable Llama model |
gpt2 | Generative Pre-trained Transformer 2 |
bert-base-uncased | Bidirectional Encoder Representations from Transformers |