arabic-RAG / backend /query_llm.py
derek-thomas
Adding better error message, and count down.
944ee1c
raw
history blame
2.76 kB
import datetime
import os
from os import getenv
import gradio as gr
import requests
API_URL = getenv('API_URL')
BEARER = getenv('BEARER')
headers = {
"Authorization": f"Bearer {BEARER}",
"Content-Type": "application/json"
}
def call_jais(payload):
try:
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status() # This will raise an exception for HTTP error codes
return response.json()
except requests.exceptions.HTTPError as http_err:
# Check if the error is a 5XX server error
if 500 <= http_err.response.status_code < 600:
raise gr.Error("The endpoint is loading, it takes about 4 min from the first call.")
else:
raise gr.Error(f"An error occurred while processing the request. {http_err}")
except Exception as err:
raise gr.Error(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")
def generate(prompt: str):
payload = {'inputs': '', 'prompt': prompt}
response = call_jais(payload)
return response
# Global variable to store the previous status and the time when it changed
previous_status = None
status_change_time = None
def check_endpoint_status():
global previous_status, status_change_time
api_url = os.getenv("ENDPOINT_URL")
headers = {
'accept': 'application/json',
'Authorization': f'Bearer {os.getenv("BEARER")}'
}
try:
response = requests.get(api_url, headers=headers)
response.raise_for_status() # will throw an exception for non-200 status
data = response.json()
# Extracting the status information
status = data.get('status', {}).get('state', 'No status found')
message = data.get('status', {}).get('message', 'No message found')
# Check if the status has changed
if status != previous_status:
previous_status = status
status_change_time = datetime.datetime.now()
# If the previous status was 'scaled to zero' and the current one isn't,
# start the countdown
countdown_message = ""
if status_change_time and previous_status == "scaled to zero" and status != "scaled to zero":
elapsed_time = datetime.datetime.now() - status_change_time
if elapsed_time < datetime.timedelta(minutes=4):
remaining_time = datetime.timedelta(minutes=4) - elapsed_time
countdown_message = f"Countdown: {remaining_time} remaining until fully operational."
return f"Status: {status}\nMessage: {message}\n{countdown_message}"
except requests.exceptions.RequestException as e:
return f"Failed to get status: {str(e)}"