chatbot_endpoints_v1

Runtime error

File size: 2,954 Bytes

import json 
import gradio as gr
import os
import requests

hf_token = os.getenv('HF_TOKEN')
api_url = os.getenv('API_URL') 
headers = {
    'Content-Type': 'application/json',
}

system_message = "\nTesting by KelvinLo UD\n"
title = "Llama2 70B Chatbot"
description = """
Demo by Kelvin Lo, UD
"""
css = """.toast-wrap { display: none !important } """
examples=[
    'Hello there! How are you doing?',
    'Can you explain to me briefly what is Python programming language?',
    'Explain the plot of Cinderella in a sentence.',
    'How many hours does it take a man to eat a Helicopter?',
    "Write a 100-word article on 'Benefits of Open-Source in AI research'",
    ]


def predict(message, chatbot):
    
    input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
    for interaction in chatbot:
        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "

    input_prompt = input_prompt + str(message) + " [/INST] "

    data = {
        "inputs": input_prompt,
        "parameters": {"max_new_tokens":256,
                      "do_sample":True,
                      "top_p":0.6,
                      "temperature":0.9,}
    }

    response = requests.post(api_url, headers=headers, data=json.dumps(data), auth=('Bearer', hf_token), stream=True)
    
    partial_message = ""
    for line in response.iter_lines():
        if line:  # filter out keep-alive new lines
            # Decode from bytes to string
            decoded_line = line.decode('utf-8')

            # Remove 'data:' prefix 
            if decoded_line.startswith('data:'):
                json_line = decoded_line[5:]  # Exclude the first 5 characters ('data:')
            else:
                #gr.Warning(f"This line does not start with 'data:': {decoded_line}")
                json_line = decoded_line
                print(decoded_line)
                continue

            # Load as JSON
            try:
                json_obj = json.loads(json_line)[0]
                print (json_obj)
                if 'generated_text' in json_obj:
                    partial_message = partial_message + json_obj['generated_text']#['token']['text']
                    yield partial_message
                elif 'error' in json_obj:
                    yield json_obj['error'] + '. Please refresh and try again with an appropriate smaller input prompt.'
                else:
                    gr.Warning(f"The key 'token' does not exist in this JSON object: {json_obj}")

            except json.JSONDecodeError:
                gr.Warning(f"This line is not valid JSON: {json_line}")
                continue
            except KeyError as e:
                gr.Warning(f"KeyError: {e} occurred for JSON object: {json_obj}")
                continue

gr.ChatInterface(predict, title=title, description=description, css=css, examples=examples, cache_examples=True).queue(concurrency_count=75).launch()