File size: 1,468 Bytes
47fcff2 a5bb25c 79cade0 771f83c 79cade0 0c5007d 79cade0 a5bb25c 79cade0 a5bb25c 18d6e67 a5bb25c e630485 a5bb25c e630485 a5bb25c 18d6e67 a5bb25c 61abdf6 47fcff2 a5bb25c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from openai import OpenAI
import os
import nest_asyncio
nest_asyncio.apply()
ACCESS_TOKEN = os.getenv("HF_TOKEN")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
def respond(
message,
system_message,
max_tokens,
temperature,
top_p,
):
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": message}
]
try:
print("Making request to API...")
response = client.chat.completions.create(
model="NousResearch/Hermes-3-Llama-3.1-8B",
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return response.choices[0].message.content
except Exception as e:
print(f"Error: {str(e)}")
return f"Error: {str(e)}"
# Create simple Gradio interface without chat memory
iface = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Message", lines=4),
gr.Textbox(label="System Message", value="You are a helpful assistant."),
gr.Slider(minimum=1, maximum=2048, value=512, label="Max Tokens"),
gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
],
outputs="text",
title="Hermes-3-Llama Chat (No Memory)"
)
if __name__ == "__main__":
iface.launch() |