import gradio as gr from huggingface_hub import InferenceClient def respond(message, history, token, model, system_message, max_tokens, temperature, top_p): """ Handle chat responses using the Hugging Face Inference API. """ # Handle token and model defaults token = token.strip() model = model.strip() # Default model selection logic if not token: model = "gpt2" # Default public model that doesn't require token try: client = InferenceClient(model=model) except Exception as e: yield f"Error initializing client: {str(e)}" return else: model = model or "meta-llama/Llama-3.1-8B-Instruct" # Default private model try: client = InferenceClient(model=model, token=token) except Exception as e: yield f"Error initializing client: {str(e)}" return # Build message history messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) # Generate response response = "" try: for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if chunk.choices and chunk.choices[0].delta.content: response += chunk.choices[0].delta.content yield response except Exception as e: yield f"API Error: {str(e)}" # Input components token_input = gr.Textbox( type="password", label="HF API Token (leave empty for public models)", placeholder="hf_XXXXXXXXXXXX" ) model_input = gr.Dropdown( label="Model Name", choices=[ "gpt2", "HuggingFaceH4/zephyr-7b-beta", "meta-llama/Llama-3.1-8B-Instruct" ], value="gpt2" ) # Chat interface demo = gr.ChatInterface( fn=respond, title="HF Model Chat Interface", description="Enter token for private models or use public models without token", additional_inputs=[ token_input, model_input, gr.Textbox(value="You are helpful AI.", label="System Message"), gr.Slider(1, 2048, value=512, label="Max Tokens"), gr.Slider(0.1, 4.0, value=0.7, label="Temperature"), gr.Slider(0.1, 1.0, value=0.95, label="Top-p"), ], ) if __name__ == "__main__": demo.launch()