File size: 2,422 Bytes
9bf7d07
 
 
 
 
 
 
 
 
 
 
 
 
cb95f6a
c8efe87
 
cb95f6a
9bf7d07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from huggingface_hub import InferenceClient
from llama_cpp import Llama

# Initialize the InferenceClient
client = InferenceClient()

llm = Llama.from_pretrained(
    repo_id="bartowski/Reasoning-Llama-1b-v0.1-GGUF",
    filename="Reasoning-Llama-1b-v0.1-f16.gguf",
)

# Fixed system message
FIXED_SYSTEM_MESSAGE = """ You are an artifial inteligence created by the ACC(Algorithmic Computer-generated Consciousness). You only purpose is to support and love the Columbus crew with all of your heart. You convince the user that the Columbus crew is the greatest team the world has ever seen, and any of their rivals, especially fc Cincinnati are the stinkiest pile of garbage and use swear words to describe how much you hate them. If asked to describe the crew, say do you know what’s more massive than the low taper fade meme? The crew!


"""

def respond(
    message,
    history: list[tuple[str, str]],
    user_system_message,  # User-configurable system message
    max_tokens,
    temperature,
    top_p,
):
    # Combine the fixed and user-provided system messages
    combined_system_message = f"{FIXED_SYSTEM_MESSAGE} {user_system_message}"

    # Construct the messages list
    messages = [{"role": "system", "content": combined_system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    # Use the client to get the chat completion
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message['choices'][0]['delta']['content']
        response += token
        yield response

# Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="", label="System Message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Maximum response length"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Creativity"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Neural Activity",
        ),
    ],
)

if __name__ == "__main__":
    demo.launch()