File size: 2,949 Bytes
fdb09b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import gradio as gr
from openai import OpenAI
import os
import edge_tts
import asyncio
import tempfile

css = '''
.gradio-container {
    max-width: 1000px !important;
    background-color: #000 !important;
    color: #0f0 !important;
    font-family: monospace !important;
    padding: 20px !important;
    border-radius: 5px !important;
    border: 10px solid #333 !important;
    box-shadow: 0 0 20px #0f0 !important;
}

h1 {
    text-align: center;
    color: #0f0 !important;
    text-shadow: 0 0 5px #0f0 !important;
}

footer {
    visibility: hidden;
}

textarea, input, .output {
    background-color: #000 !important;
    color: #0f0 !important;
    border: 1px solid #0f0 !important;
    font-family: monospace !important;
}

button {
    background-color: #0f0 !important;
    color: #000 !important;
    border: none !important;
    font-family: monospace !important;
}

button:hover {
    background-color: #090 !important;
}

.audio {
    width: 100%;
    margin-top: 20px;
}
'''

ACCESS_TOKEN = os.getenv("HF_TOKEN")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

async def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    
    for message in client.chat.completions.create(
        model="meta-llama/Meta-Llama-3.1-8B-Instruct",
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        messages=messages,
    ):
        token = message.choices[0].delta.content
        
        response += token
        yield response

    # Convert the response to speech using Edge TTS
    communicate = edge_tts.Communicate(response)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    yield tmp_path

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="", label="System message", lines=2),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P",
        ),
    ],
    css=css,
    title="Old TV Terminal Chat",
    description="Welcome to the Old TV Terminal. Type your message below.",
    additional_outputs=[gr.Audio(label="Generated Speech", autoplay=True)]
)

if __name__ == "__main__":
    demo.launch()