File size: 4,591 Bytes
b94dfba 47fcff2 79cade0 1b9ab22 18d6e67 79cade0 0c5007d 1b9ab22 79cade0 246199f 0cce7a0 79cade0 18d6e67 246199f 79cade0 0cce7a0 efe1573 1b9ab22 47fcff2 18d6e67 1b9ab22 5d4663f 246199f 1b9ab22 efe1573 246199f 3ba965f 246199f 1b9ab22 efe8c50 18d6e67 efe8c50 18d6e67 efe8c50 3ba965f 246199f 18d6e67 efe8c50 3ba965f 18d6e67 a604d22 b94dfba 246199f a604d22 47fcff2 3ba965f b94dfba 3ba965f 246199f 3ba965f 47fcff2 61abdf6 47fcff2 b94dfba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
The error message you're seeing is likely due to the way you're defining and calling the `generate_response` function in your Gradio app.
In Gradio, when you define a function with the `async` keyword, it's expected to be a coroutine that returns a value. However, when you define a function with the `async` keyword, you need to use the `await` keyword to call it.
In your case, you're defining the `generate_response` function as an `async` function, but you're not using the `await` keyword to call it. Instead, you're passing it as a callback to the `gr.Button` component.
To fix this issue, you need to define the `generate_response` function without the `async` keyword, and then use the `await` keyword to call the `respond` function inside it.
Here's an updated version of your code that should work:
```python
import gradio as gr
import os
import openai
import tenacity
ACCESS_TOKEN = os.getenv("HF_TOKEN")
openai.api_key = ACCESS_TOKEN
# Retry logic with tenacity for handling API rate limits
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
async def respond(
message,
system_message,
max_tokens,
temperature,
top_p,
):
try:
# Only use the system message and the current message for the response
messages = [{"role": "system", "content": system_message},
{"role": "user", "content": message}]
response = ""
# Properly stream chat completions using dot notation
stream = openai.ChatCompletion.create(
model="NousResearch/Hermes-3-Llama-3.1-8B",
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
messages=messages,
stream=True,
)
# Stream response and concatenate tokens
for chunk in stream:
if 'choices' in chunk and 'delta' in chunk['choices'][0] and 'content' in chunk['choices'][0]['delta']:
token = chunk['choices'][0]['delta']['content']
response += token
return response
except openai.error.APIError as e:
# Handle both string and dict types of error bodies
error_details = e.body
if isinstance(error_details, dict):
error_type = error_details.get("type", "Unknown")
error_code = error_details.get("code", "Unknown")
error_param = error_details.get("param", "Unknown")
error_message = error_details.get("message", "An error occurred.")
error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
else:
error_str = f"Error: {error_details}"
print(f"APIError: {error_str}")
return error_str
except Exception as e:
print(f"Exception: {e}")
return "Error occurred. Please try again."
# Gradio function to handle user input and response generation without history
def generate_response(message, system_message, max_tokens, temperature, top_p):
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
response = loop.run_until_complete(respond(message, system_message, max_tokens, temperature, top_p))
return response
def launch_app():
try:
demo = gr.Blocks()
with demo:
gr.Markdown("# Chatbot")
message = gr.Textbox(label="Message")
system_message = gr.Textbox(label="System message")
max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
response = gr.Text(label="Response")
# Use the generate_response function without history
gr.Button("Generate Response").click(
generate_response,
inputs=[message, system_message, max_tokens, temperature, top_p],
outputs=[response],
show_progress=False,
)
demo.launch(show_error=True)
except KeyError as e:
print(f"Error: {e}")
print("Please try again.")
if __name__ == "__main__":
launch_app()
```
This code defines the `generate_response` function without the `async` keyword, and then uses the `await` keyword to call the `respond` function inside it. It also creates a new event loop to run the `respond` function, since it's an asynchronous function. |