Spaces:
Runtime error
Runtime error
File size: 2,954 Bytes
b649a34 7c52de8 b649a34 38da75e b649a34 7c52de8 b649a34 6f32094 b649a34 639b64b b649a34 6f32094 639b64b 6f32094 b649a34 6f32094 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import json
import gradio as gr
import os
import requests
hf_token = os.getenv('HF_TOKEN')
api_url = os.getenv('API_URL')
headers = {
'Content-Type': 'application/json',
}
system_message = "\nTesting by KelvinLo UD\n"
title = "Llama2 70B Chatbot"
description = """
Demo by Kelvin Lo, UD
"""
css = """.toast-wrap { display: none !important } """
examples=[
'Hello there! How are you doing?',
'Can you explain to me briefly what is Python programming language?',
'Explain the plot of Cinderella in a sentence.',
'How many hours does it take a man to eat a Helicopter?',
"Write a 100-word article on 'Benefits of Open-Source in AI research'",
]
def predict(message, chatbot):
input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
for interaction in chatbot:
input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
input_prompt = input_prompt + str(message) + " [/INST] "
data = {
"inputs": input_prompt,
"parameters": {"max_new_tokens":256,
"do_sample":True,
"top_p":0.6,
"temperature":0.9,}
}
response = requests.post(api_url, headers=headers, data=json.dumps(data), auth=('Bearer', hf_token), stream=True)
partial_message = ""
for line in response.iter_lines():
if line: # filter out keep-alive new lines
# Decode from bytes to string
decoded_line = line.decode('utf-8')
# Remove 'data:' prefix
if decoded_line.startswith('data:'):
json_line = decoded_line[5:] # Exclude the first 5 characters ('data:')
else:
#gr.Warning(f"This line does not start with 'data:': {decoded_line}")
json_line = decoded_line
print(decoded_line)
continue
# Load as JSON
try:
json_obj = json.loads(json_line)[0]
print (json_obj)
if 'generated_text' in json_obj:
partial_message = partial_message + json_obj['generated_text']#['token']['text']
yield partial_message
elif 'error' in json_obj:
yield json_obj['error'] + '. Please refresh and try again with an appropriate smaller input prompt.'
else:
gr.Warning(f"The key 'token' does not exist in this JSON object: {json_obj}")
except json.JSONDecodeError:
gr.Warning(f"This line is not valid JSON: {json_line}")
continue
except KeyError as e:
gr.Warning(f"KeyError: {e} occurred for JSON object: {json_obj}")
continue
gr.ChatInterface(predict, title=title, description=description, css=css, examples=examples, cache_examples=True).queue(concurrency_count=75).launch()
|