File size: 2,755 Bytes
944ee1c
91f49a8
8b15eea
 
9a4e478
 
8b15eea
 
 
 
 
9a4e478
 
 
8b15eea
5f311b3
8b15eea
5f311b3
 
 
 
 
944ee1c
 
 
 
 
5f311b3
f54d972
5f311b3
8b15eea
 
9a4e478
8b15eea
91f49a8
 
 
944ee1c
 
 
 
 
91f49a8
944ee1c
 
91f49a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
944ee1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91f49a8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import datetime
import os
from os import getenv

import gradio as gr
import requests

API_URL = getenv('API_URL')
BEARER = getenv('BEARER')

headers = {
    "Authorization": f"Bearer {BEARER}",
    "Content-Type": "application/json"
    }


def call_jais(payload):
    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()  # This will raise an exception for HTTP error codes
        return response.json()
    except requests.exceptions.HTTPError as http_err:
        # Check if the error is a 5XX server error
        if 500 <= http_err.response.status_code < 600:
            raise gr.Error("The endpoint is loading, it takes about 4 min from the first call.")
        else:
            raise gr.Error(f"An error occurred while processing the request. {http_err}")
    except Exception as err:
        raise gr.Error(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")


def generate(prompt: str):
    payload = {'inputs': '', 'prompt': prompt}
    response = call_jais(payload)
    return response


# Global variable to store the previous status and the time when it changed
previous_status = None
status_change_time = None


def check_endpoint_status():
    global previous_status, status_change_time

    api_url = os.getenv("ENDPOINT_URL")
    headers = {
        'accept': 'application/json',
        'Authorization': f'Bearer {os.getenv("BEARER")}'
        }

    try:
        response = requests.get(api_url, headers=headers)
        response.raise_for_status()  # will throw an exception for non-200 status
        data = response.json()

        # Extracting the status information
        status = data.get('status', {}).get('state', 'No status found')
        message = data.get('status', {}).get('message', 'No message found')

        # Check if the status has changed
        if status != previous_status:
            previous_status = status
            status_change_time = datetime.datetime.now()

        # If the previous status was 'scaled to zero' and the current one isn't,
        # start the countdown
        countdown_message = ""
        if status_change_time and previous_status == "scaled to zero" and status != "scaled to zero":
            elapsed_time = datetime.datetime.now() - status_change_time
            if elapsed_time < datetime.timedelta(minutes=4):
                remaining_time = datetime.timedelta(minutes=4) - elapsed_time
                countdown_message = f"Countdown: {remaining_time} remaining until fully operational."

        return f"Status: {status}\nMessage: {message}\n{countdown_message}"
    except requests.exceptions.RequestException as e:
        return f"Failed to get status: {str(e)}"