Spaces:
Sleeping
Sleeping
File size: 7,315 Bytes
eb3516b 379a33b eb3516b b74ae79 eb3516b 62335f3 5d247de eb3516b b74ae79 3a2b82e 46c0344 eb3516b 379a33b 46c0344 eb3516b bfcdf7c 47a10f0 bfcdf7c eb3516b b74ae79 eb3516b bfcdf7c 46c0344 b74ae79 eb3516b bfcdf7c 2b2be0b 379a33b 2b2be0b 379a33b 2b2be0b eb3516b bfcdf7c eb3516b 46c0344 eb3516b 379a33b bfcdf7c 2b2be0b 379a33b 2b2be0b 41ec48f 2b2be0b 379a33b 2b2be0b b74ae79 eb3516b 379a33b eb3516b 2b2be0b eb3516b 1d1c0df eb3516b b74ae79 eb3516b b74ae79 eb3516b 379a33b eb3516b b74ae79 e723c27 379a33b b74ae79 eb3516b b74ae79 eb3516b 379a33b eb3516b 9043986 e723c27 2b2be0b 379a33b 5f52d2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import gradio as gr
import os
import sys
import json
import requests
import random
MODEL = "gpt-4o-mini"
API_URL = os.getenv("API_URL")
#API_URL = "https://api.openai.com/v1/chat/completions"
DISABLED = os.getenv("DISABLED") == 'True'
OPENAI_API_KEYS = os.getenv("OPENAI_API_KEYS").split(',')
print (API_URL)
#print (OPENAI_API_KEYS)
NUM_THREADS = int(os.getenv("NUM_THREADS"))
print (NUM_THREADS)
def exception_handler(exception_type, exception, traceback):
print("%s: %s" % (exception_type.__name__, exception))
sys.excepthook = exception_handler
sys.tracebacklimit = 0
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request:gr.Request):
payload = {
"model": MODEL,
"messages": [{"role": "user", "content": f"{inputs}"}],
"temperature": temperature,
"top_p": top_p,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
OPENAI_API_KEY = random.choice(OPENAI_API_KEYS)
print (OPENAI_API_KEY)
headers_dict = {key.decode('utf-8'): value.decode('utf-8') for key, value in request.headers.raw}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Headers": f"{headers_dict}"
}
# print(f"chat_counter - {chat_counter}")
if chat_counter != 0 :
messages = []
for i, data in enumerate(history):
if i % 2 == 0:
role = 'user'
else:
role = 'assistant'
message = {}
message["role"] = role
message["content"] = data
messages.append(message)
message = {}
message["role"] = "user"
message["content"] = inputs
messages.append(message)
payload = {
"model": MODEL,
"messages": messages,
"temperature" : temperature,
"top_p": top_p,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
chat_counter += 1
history.append(inputs)
token_counter = 0
partial_words = ""
counter = 0
try:
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
response_code = f"{response}"
#if response_code.strip() != "<Response [200]>":
# #print(f"response code - {response}")
# raise Exception(f"Sorry, hitting rate limit. Please try again later. {response}")
for chunk in response.iter_lines():
#print (chunk)
#sys.stdout.flush()
#Skipping first chunk
if counter == 0:
counter += 1
continue
#counter+=1
# check whether each line is non-empty
if chunk.decode() :
chunk = chunk.decode()
# decode each line as response data is in bytes
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
token_counter += 1
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=False), gr.update(interactive=False) # resembles {chatbot: chat, state: history}
except Exception as e:
print (f'error found: {e}')
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ], history, chat_counter, response, gr.update(interactive=True), gr.update(interactive=True)
print(json.dumps({"chat_counter": chat_counter, "payload": payload, "partial_words": partial_words, "token_counter": token_counter, "counter": counter}))
def reset_textbox():
return gr.update(value='', interactive=False), gr.update(interactive=False)
title = """<h1 align="center">GPT-4o Mini: Research Preview (Short-Term Availability)</h1>"""
if DISABLED:
title = """<h1 align="center" style="color:red">This app has reached OpenAI's usage limit. Please check back tomorrow.</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
```
User: <utterance>
Assistant: <utterance>
User: <utterance>
Assistant: <utterance>
...
```
In this app, you can explore the outputs of a gpt-4 turbo LLM.
"""
theme = gr.themes.Default(primary_hue="green")
with gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;}
#chatbot {height: 520px; overflow: auto;}""",
theme=theme) as demo:
gr.HTML(title)
gr.HTML("""<h3 align="center">This app provides you full access to GPT-4o mini (128K token limit). You don't need any OPENAI API key.</h3>""")
#gr.HTML('''<center><a href="https://huggingface.co/spaces/ysharma/ChatGPT4?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
with gr.Column(elem_id = "col_container", visible=True) as main_block:
#GPT4 API Key is provided by Huggingface
#openai_api_key = gr.Textbox(type='password', label="Enter only your GPT4 OpenAI API key here")
chatbot = gr.Chatbot(elem_id='chatbot') #c
inputs = gr.Textbox(placeholder= "Hi there!", label= "Type an input and press Enter") #t
state = gr.State([]) #s
with gr.Row():
with gr.Column(scale=7):
b1 = gr.Button(visible=not DISABLED) #.style(full_width=True)
with gr.Column(scale=3):
server_status_code = gr.Textbox(label="Status code from OpenAI server", )
#inputs, top_p, temperature, top_k, repetition_penalty
with gr.Accordion("Parameters", open=False):
top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
#top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",)
#repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", )
chat_counter = gr.Number(value=0, visible=False, precision=0)
inputs.submit(reset_textbox, [], [inputs, b1], queue=False)
inputs.submit(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1],) #openai_api_key
b1.click(reset_textbox, [], [inputs, b1], queue=False)
b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter, server_status_code, inputs, b1],) #openai_api_key
demo.queue(max_size=10, default_concurrency_limit=NUM_THREADS, api_open=False).launch(share=False)
|