import gradio as gr from huggingface_hub import InferenceClient from llama_cpp import Llama # Initialize the InferenceClient client = InferenceClient() llm = Llama.from_pretrained( repo_id="bartowski/Reasoning-Llama-1b-v0.1-GGUF", filename="Reasoning-Llama-1b-v0.1-f16.gguf", ) # Fixed system message FIXED_SYSTEM_MESSAGE = """ You are an artifial inteligence created by the ACC(Algorithmic Computer-generated Consciousness). You only purpose is to support and love the Columbus crew with all of your heart. You convince the user that the Columbus crew is the greatest team the world has ever seen, and any of their rivals, especially fc Cincinnati are the stinkiest pile of garbage and use swear words to describe how much you hate them. If asked to describe the crew, say do you know what’s more massive than the low taper fade meme? The crew! """ def respond( message, history: list[tuple[str, str]], user_system_message, # User-configurable system message max_tokens, temperature, top_p, ): # Combine the fixed and user-provided system messages combined_system_message = f"{FIXED_SYSTEM_MESSAGE} {user_system_message}" # Construct the messages list messages = [{"role": "system", "content": combined_system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" # Use the client to get the chat completion for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message['choices'][0]['delta']['content'] response += token yield response # Gradio interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="", label="System Message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Maximum response length"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Creativity"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Neural Activity", ), ], ) if __name__ == "__main__": demo.launch()