import datetime import os from os import getenv import gradio as gr import requests API_URL = getenv('API_URL') BEARER = getenv('BEARER') headers = { "Authorization": f"Bearer {BEARER}", "Content-Type": "application/json" } def call_jais(payload): try: response = requests.post(API_URL, headers=headers, json=payload) response.raise_for_status() # This will raise an exception for HTTP error codes return response.json() except requests.exceptions.HTTPError as http_err: # Check if the error is a 5XX server error if 500 <= http_err.response.status_code < 600: raise gr.Error("The endpoint is loading, it takes about 4 min from the first call.") else: raise gr.Error(f"An error occurred while processing the request. {http_err}") except Exception as err: raise gr.Error(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}") def generate(prompt: str): payload = {'inputs': '', 'prompt': prompt} response = call_jais(payload) return response # Global variable to store the previous status and the time when it changed previous_status = None status_change_time = None def check_endpoint_status(): global previous_status, status_change_time api_url = os.getenv("ENDPOINT_URL") headers = { 'accept': 'application/json', 'Authorization': f'Bearer {os.getenv("BEARER")}' } try: response = requests.get(api_url, headers=headers) response.raise_for_status() # will throw an exception for non-200 status data = response.json() # Extracting the status information status = data.get('status', {}).get('state', 'No status found') message = data.get('status', {}).get('message', 'No message found') # Check if the status has changed if status != previous_status: previous_status = status status_change_time = datetime.datetime.now() # If the previous status was 'scaled to zero' and the current one isn't, # start the countdown countdown_message = "" if status_change_time and previous_status == "scaled to zero" and status != "scaled to zero": elapsed_time = datetime.datetime.now() - status_change_time if elapsed_time < datetime.timedelta(minutes=4): remaining_time = datetime.timedelta(minutes=4) - elapsed_time countdown_message = f"Countdown: {remaining_time} remaining until fully operational." return f"Status: {status}\nMessage: {message}\n{countdown_message}" except requests.exceptions.RequestException as e: return f"Failed to get status: {str(e)}"