File size: 1,468 Bytes
47fcff2
a5bb25c
79cade0
771f83c
 
 
79cade0
0c5007d
79cade0
a5bb25c
 
 
 
 
 
79cade0
 
 
 
 
 
a5bb25c
 
 
 
 
18d6e67
a5bb25c
 
 
 
e630485
 
a5bb25c
e630485
a5bb25c
18d6e67
a5bb25c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61abdf6
47fcff2
a5bb25c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
from openai import OpenAI
import os
import nest_asyncio

nest_asyncio.apply()

ACCESS_TOKEN = os.getenv("HF_TOKEN")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

def respond(
    message,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": message}
    ]
    
    try:
        print("Making request to API...")
        response = client.chat.completions.create(
            model="NousResearch/Hermes-3-Llama-3.1-8B",
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error: {str(e)}")
        return f"Error: {str(e)}"

# Create simple Gradio interface without chat memory
iface = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(label="Message", lines=4),
        gr.Textbox(label="System Message", value="You are a helpful assistant."),
        gr.Slider(minimum=1, maximum=2048, value=512, label="Max Tokens"),
        gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
        gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
    ],
    outputs="text",
    title="Hermes-3-Llama Chat (No Memory)"
)

if __name__ == "__main__":
    iface.launch()