|
import gradio as gr |
|
from openai import OpenAI |
|
import os |
|
import nest_asyncio |
|
|
|
nest_asyncio.apply() |
|
|
|
ACCESS_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
client = OpenAI( |
|
base_url="https://api-inference.huggingface.co/v1/", |
|
api_key=ACCESS_TOKEN, |
|
) |
|
|
|
def respond( |
|
message, |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
): |
|
messages = [ |
|
{"role": "system", "content": system_message}, |
|
{"role": "user", "content": message} |
|
] |
|
|
|
try: |
|
print("Making request to API...") |
|
response = client.chat.completions.create( |
|
model="NousResearch/Hermes-3-Llama-3.1-8B", |
|
messages=messages, |
|
max_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p |
|
) |
|
return response.choices[0].message.content |
|
except Exception as e: |
|
print(f"Error: {str(e)}") |
|
return f"Error: {str(e)}" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=respond, |
|
inputs=[ |
|
gr.Textbox(label="Message", lines=4), |
|
gr.Textbox(label="System Message", value="You are a helpful assistant."), |
|
gr.Slider(minimum=1, maximum=2048, value=512, label="Max Tokens"), |
|
gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"), |
|
gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P") |
|
], |
|
outputs="text", |
|
title="Hermes-3-Llama Chat (No Memory)" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |