import gradio as gr from openai import OpenAI import os # -------------------------------------------------------------------------------- # Serverless-TextGen-Hub # This application is a Gradio-based UI for text generation using # Hugging Face's serverless Inference API. We also incorporate features # inspired by the ImgGen-Hub, such as: # - A "Featured Models" accordion with text filtering. # - A "Custom Model" textbox for specifying a non-featured model. # - An "Information" tab with accordions for "Featured Models" and # "Parameters Overview" containing helpful user guides. # -------------------------------------------------------------------------------- # Retrieve the access token from environment variables ACCESS_TOKEN = os.getenv("HF_TOKEN") # HF_TOKEN is your Hugging Face Inference API key print("Access token loaded.") # Initialize the OpenAI client with the Hugging Face Inference API endpoint client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=ACCESS_TOKEN, ) print("OpenAI client initialized.") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, frequency_penalty, seed, # NEW inputs for model selection model_search, selected_model, custom_model ): """ This function handles the chatbot response. Parameters: - message: The user's newest message (string). - history: The list of previous messages in the conversation, each as a tuple (user_msg, assistant_msg). - system_message: The system prompt provided. - max_tokens: The maximum number of tokens to generate in the response. - temperature: Sampling temperature (float). - top_p: Top-p (nucleus) sampling (float). - frequency_penalty: Penalize repeated tokens in the output (float). - seed: A fixed seed for reproducibility; -1 means 'random'. - model_search: The text used to filter the "Featured Models" Radio button list (unused here directly, but updated by the UI). - selected_model: The model selected via the "Featured Models" Radio button. - custom_model: If not empty, overrides selected_model with this custom path. """ # DEBUG LOGGING print(f"Received message: {message}") print(f"History: {history}") print(f"System message: {system_message}") print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}") print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}") print(f"Model search text: {model_search}") print(f"Selected featured model: {selected_model}") print(f"Custom model (overrides if not empty): {custom_model}") # Convert seed to None if -1 (meaning random) if seed == -1: seed = None # Determine the final model name to use # If the custom_model textbox is non-empty, we use that. # Otherwise, we use the selected model from the Radio buttons. if custom_model.strip(): model_to_use = custom_model.strip() else: model_to_use = selected_model # Construct the messages array required by the OpenAI-like HF API messages = [{"role": "system", "content": system_message}] # System prompt # Add conversation history to context for val in history: user_part = val[0] assistant_part = val[1] if user_part: messages.append({"role": "user", "content": user_part}) if assistant_part: messages.append({"role": "assistant", "content": assistant_part}) # Append the latest user message messages.append({"role": "user", "content": message}) # Start with an empty string to build the response as tokens stream in response = "" print(f"Using model: {model_to_use}") print("Sending request to OpenAI API...") # Make the streaming request to the HF Inference API via openai-like client # Below, we pass 'model_to_use' instead of a hard-coded model for message_chunk in client.chat.completions.create( model=model_to_use, # <-- model is now dynamically selected max_tokens=max_tokens, stream=True, # Stream the response temperature=temperature, top_p=top_p, frequency_penalty=frequency_penalty, seed=seed, messages=messages, ): # Extract token text from the response chunk token_text = message_chunk.choices[0].delta.content response += token_text # As we get new tokens, we stream them back to the user yield response print("Completed response generation.") # Create a Chatbot component with a specified height chatbot = gr.Chatbot(height=600) # ------------------------------------------------------------ # Below: We define the UI with additional features integrated. # We'll replicate some of the style from the ImgGen-Hub code: # - A "Featured Models" accordion with the ability to filter # - A "Custom Model" text box # - An "Information" tab with "Featured Models" table and # "Parameters Overview" containing markdown descriptions. # ------------------------------------------------------------ # List of placeholder "Featured Models" for demonstration featured_models_list = [ "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-2-70B-chat-hf", "meta-llama/Llama-2-13B-chat-hf", "bigscience/bloom", "google/flan-t5-xxl", ] # This function filters the models in featured_models_list based on user input def filter_models(search_term): """ Filters featured_models_list based on the text in 'search_term'. """ filtered = [m for m in featured_models_list if search_term.lower() in m.lower()] return gr.update(choices=filtered) print("Initializing Gradio interface...") # Debug log # We build a custom Blocks layout to incorporate tabs and advanced UI elements with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo: # Top-level heading for clarity gr.Markdown("# Serverless-TextGen-Hub\nA Comprehensive UI for Text Generation") with gr.Tab("Chat"): # We'll place the ChatInterface within this tab # Create the additional UI elements in a collapsible or visible layout with gr.Accordion("Featured Models", open=False): with gr.Row(): model_search = gr.Textbox( label="Filter Models", placeholder="Search for a featured model...", lines=1, ) with gr.Row(): model_radio = gr.Radio( label="Select a featured model below", choices=featured_models_list, value="meta-llama/Llama-3.3-70B-Instruct", interactive=True, ) # On change of model_search, we update the radio choices model_search.change( filter_models, inputs=model_search, outputs=model_radio ) # Textbox for specifying a custom model that overrides the featured selection if not empty custom_model = gr.Textbox( label="Custom Model Path (overrides Featured Models if not empty)", placeholder="e.g. meta-llama/Llama-2-13B-chat-hf", lines=1 ) # Build the chat interface itself # We'll pass "model_search", "model_radio", and "custom_model" as additional inputs # so that the 'respond' function can see them and decide which model to use chatbot_interface = gr.ChatInterface( fn=respond, # The function that generates the text additional_inputs=[ gr.Textbox( value="You are a helpful AI assistant.", label="System message", lines=2 ), # system_message gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens"), # max_tokens gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # temperature gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05,label="Top-P"), # top_p gr.Slider( minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty" ), # frequency_penalty gr.Slider( minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)" ), # seed model_search, # Exposed but won't be typed into during conversation, model_radio, custom_model ], chatbot=chatbot, title="Serverless-TextGen-Hub", # The fill_height ensures the chat area expands fill_height=True ) # A new tab for "Information" about Featured Models and Parameters with gr.Tab("Information"): gr.Markdown("## Learn More About the Parameters and Models") # Accordion for "Featured Models" with gr.Accordion("Featured Models (WiP)", open=False): gr.HTML( """
Below is a small table of example models. In practice, you can pick from
thousands of available text generation models on Hugging Face.
Use the Filter Models box under the Featured Models accordion
in the Chat tab to search by name, or enter a Custom Model path.
Model Name | Is It Large? | Notes |
---|---|---|
meta-llama/Llama-3.3-70B-Instruct | Yes | Placeholder example |
meta-llama/Llama-2-13B-chat-hf | Medium | Placeholder example |
google/flan-t5-xxl | Yes | Placeholder example |
meta-llama/Llama-2-70B-chat-hf
or bigscience/bloom
.
"""
)
print("Gradio interface initialized.")
# ------------------------------------------------------------
# Finally, we launch the app if the script is run directly.
# ------------------------------------------------------------
if __name__ == "__main__":
print("Launching the demo application...")
demo.launch()