File size: 2,536 Bytes
c551206 020a962 1df13e1 c551206 8cd9af7 c551206 1df13e1 c551206 9faed3d 0e16686 9faed3d 8cd9af7 c551206 a9af4d7 c551206 85dbf4a c551206 0e16686 c551206 0e16686 cb4c132 0e16686 8c77830 cb4c132 9faed3d a414401 9faed3d 8c77830 9faed3d 9809955 85dbf4a 9faed3d c551206 9faed3d cb4c132 9faed3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
import requests
import os
import json
API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"
headers = {
"Authorization": f"Bearer {API_KEY}",
"Accept": "application/json",
"Content-Type": "application/json",
}
BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
def call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}] if system_message else []
messages.extend([{"role": "user", "content": msg[0]}, {"role": "assistant", "content": msg[1]} for msg in history_api])
payload = {
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": False
}
session = requests.Session()
response = session.post(INVOKE_URL, headers=headers, json=payload)
while response.status_code == 202:
request_id = response.headers.get("NVCF-REQID")
fetch_url = FETCH_URL_FORMAT + request_id
response = session.get(fetch_url, headers=headers)
response.raise_for_status()
response_body = response.json()
if response_body.get("choices"):
assistant_message = response_body["choices"][0]["message"]["content"]
return assistant_message
else:
return "Desculpe, ocorreu um erro ao gerar a resposta."
def chatbot_function(message, history_api, system_message, max_tokens, temperature, top_p):
assistant_message = call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p)
history_api.append([message, assistant_message])
return assistant_message, history_api
system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
with gr.Blocks() as demo:
chat_history_state = gr.State([])
chat_interface = gr.ChatInterface(
fn=chatbot_function,
chatbot=gr.Chatbot(history=chat_history_state),
additional_inputs=[system_msg, max_tokens, temperature, top_p],
title="LLAMA 70B Free Demo",
)
demo.launch()
|