File size: 3,549 Bytes
47fcff2 79cade0 1b9ab22 18d6e67 771f83c 79cade0 0c5007d 1b9ab22 79cade0 246199f 0cce7a0 79cade0 18d6e67 246199f 79cade0 0cce7a0 efe1573 1b9ab22 47fcff2 18d6e67 1b9ab22 5d4663f 246199f 1b9ab22 efe1573 246199f 3ba965f 246199f 1b9ab22 efe8c50 18d6e67 efe8c50 18d6e67 efe8c50 3ba965f 246199f 18d6e67 efe8c50 3ba965f 18d6e67 a604d22 b94dfba 246199f a604d22 47fcff2 3ba965f b94dfba 3ba965f 246199f 3ba965f 47fcff2 61abdf6 47fcff2 96700cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import gradio as gr
import os
import openai
import tenacity
import nest_asyncio
nest_asyncio.apply()
ACCESS_TOKEN = os.getenv("HF_TOKEN")
openai.api_key = ACCESS_TOKEN
# Retry logic with tenacity for handling API rate limits
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
async def respond(
message,
system_message,
max_tokens,
temperature,
top_p,
):
try:
# Only use the system message and the current message for the response
messages = [{"role": "system", "content": system_message},
{"role": "user", "content": message}]
response = ""
# Properly stream chat completions using dot notation
stream = openai.ChatCompletion.create(
model="NousResearch/Hermes-3-Llama-3.1-8B",
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
messages=messages,
stream=True,
)
# Stream response and concatenate tokens
for chunk in stream:
if 'choices' in chunk and 'delta' in chunk['choices'][0] and 'content' in chunk['choices'][0]['delta']:
token = chunk['choices'][0]['delta']['content']
response += token
return response
except openai.error.APIError as e:
# Handle both string and dict types of error bodies
error_details = e.body
if isinstance(error_details, dict):
error_type = error_details.get("type", "Unknown")
error_code = error_details.get("code", "Unknown")
error_param = error_details.get("param", "Unknown")
error_message = error_details.get("message", "An error occurred.")
error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
else:
error_str = f"Error: {error_details}"
print(f"APIError: {error_str}")
return error_str
except Exception as e:
print(f"Exception: {e}")
return "Error occurred. Please try again."
# Gradio function to handle user input and response generation without history
def generate_response(message, system_message, max_tokens, temperature, top_p):
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete(respond(message, system_message, max_tokens, temperature, top_p))
return response
def launch_app():
try:
demo = gr.Blocks()
with demo:
gr.Markdown("# Chatbot")
message = gr.Textbox(label="Message")
system_message = gr.Textbox(label="System message")
max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
response = gr.Text(label="Response")
# Use the generate_response function without history
gr.Button("Generate Response").click(
generate_response,
inputs=[message, system_message, max_tokens, temperature, top_p],
outputs=[response],
show_progress=False,
)
demo.launch(show_error=True)
except KeyError as e:
print(f"Error: {e}")
print("Please try again.")
if __name__ == "__main__":
launch_app() |