import gradio as gr from huggingface_hub import InferenceClient def respond(message, history, token, model, system_message, max_tokens, temperature, top_p): """ Handle chat responses using the Hugging Face Inference API. """ # Handle token and model defaults token = token.strip() model = model.strip() # Default model selection logic if not token: # Use default public model when no token is provided model = "HuggingFaceH4/zephyr-7b-beta" try: client = InferenceClient(model=model) except Exception as e: yield f"Error initializing client: {str(e)}" return else: # Require model name when token is provided if not model: yield "Please provide a model name when using an HF token." return try: client = InferenceClient(model=model, token=token) except Exception as e: yield f"Error initializing client: {str(e)}" return # Build message history messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) # Generate response response = "" try: for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if chunk.choices and chunk.choices[0].delta.content: response += chunk.choices[0].delta.content yield response except Exception as e: yield f"API Error: {str(e)}" # Input components token_input = gr.Textbox( type="password", label="HF API Token", placeholder="hf_XXXXXXXXXXXX" ) model_input = gr.Textbox( label="Model Name (required with HF token)", placeholder="Enter model name when using token", visible=True ) # Chat interface demo = gr.ChatInterface( fn=respond, title="HF Text Generation Model Tester", description="Add token + model name", additional_inputs=[ token_input, model_input, gr.Textbox(value="You are helpful AI chatbot who responds like a mideval knight who is extremely polite, noble and funny.", label="System Message"), gr.Slider(1, 2048, value=512, label="Max Tokens"), gr.Slider(0.1, 4.0, value=0.7, label="Temperature"), gr.Slider(0.1, 1.0, value=0.95, label="Top-p"), ], ) if __name__ == "__main__": demo.launch()