Jyothikamalesh's picture
Update app.py
a5bb25c verified
raw
history blame
1.47 kB
import gradio as gr
from openai import OpenAI
import os
import nest_asyncio
nest_asyncio.apply()
ACCESS_TOKEN = os.getenv("HF_TOKEN")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
def respond(
message,
system_message,
max_tokens,
temperature,
top_p,
):
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": message}
]
try:
print("Making request to API...")
response = client.chat.completions.create(
model="NousResearch/Hermes-3-Llama-3.1-8B",
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return response.choices[0].message.content
except Exception as e:
print(f"Error: {str(e)}")
return f"Error: {str(e)}"
# Create simple Gradio interface without chat memory
iface = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Message", lines=4),
gr.Textbox(label="System Message", value="You are a helpful assistant."),
gr.Slider(minimum=1, maximum=2048, value=512, label="Max Tokens"),
gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
],
outputs="text",
title="Hermes-3-Llama Chat (No Memory)"
)
if __name__ == "__main__":
iface.launch()