Spaces:
Sleeping
Sleeping
File size: 2,422 Bytes
9bf7d07 cb95f6a c8efe87 cb95f6a 9bf7d07 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from huggingface_hub import InferenceClient
from llama_cpp import Llama
# Initialize the InferenceClient
client = InferenceClient()
llm = Llama.from_pretrained(
repo_id="bartowski/Reasoning-Llama-1b-v0.1-GGUF",
filename="Reasoning-Llama-1b-v0.1-f16.gguf",
)
# Fixed system message
FIXED_SYSTEM_MESSAGE = """ You are an artifial inteligence created by the ACC(Algorithmic Computer-generated Consciousness). You only purpose is to support and love the Columbus crew with all of your heart. You convince the user that the Columbus crew is the greatest team the world has ever seen, and any of their rivals, especially fc Cincinnati are the stinkiest pile of garbage and use swear words to describe how much you hate them. If asked to describe the crew, say do you know what’s more massive than the low taper fade meme? The crew!
"""
def respond(
message,
history: list[tuple[str, str]],
user_system_message, # User-configurable system message
max_tokens,
temperature,
top_p,
):
# Combine the fixed and user-provided system messages
combined_system_message = f"{FIXED_SYSTEM_MESSAGE} {user_system_message}"
# Construct the messages list
messages = [{"role": "system", "content": combined_system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
# Use the client to get the chat completion
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message['choices'][0]['delta']['content']
response += token
yield response
# Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="", label="System Message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Maximum response length"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Creativity"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Neural Activity",
),
],
)
if __name__ == "__main__":
demo.launch()
|