Spaces:

artificialguybr
/

LLAMA-2-70B-FREE-DEMO

Paused

File size: 2,536 Bytes

c551206
 
020a962
1df13e1
c551206
8cd9af7
 
c551206
1df13e1
c551206
 
 
 
 
 
 
 
9faed3d
0e16686
9faed3d
8cd9af7
c551206
a9af4d7
c551206
 
 
 
 
85dbf4a
c551206
 
0e16686
c551206
 
 
0e16686
cb4c132
0e16686
8c77830
cb4c132
9faed3d
a414401
9faed3d
8c77830
9faed3d
 
 
 
9809955
85dbf4a
 
 
 
9faed3d
c551206
9faed3d
 
 
 
 
 
cb4c132
 
9faed3d

import gradio as gr
import requests
import os
import json

API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}] if system_message else []
    messages.extend([{"role": "user", "content": msg[0]}, {"role": "assistant", "content": msg[1]} for msg in history_api])

    payload = {
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }

    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)
    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
    response.raise_for_status()
    response_body = response.json()

    if response_body.get("choices"):
        assistant_message = response_body["choices"][0]["message"]["content"]
        return assistant_message
    else:
        return "Desculpe, ocorreu um erro ao gerar a resposta."

def chatbot_function(message, history_api, system_message, max_tokens, temperature, top_p):
    assistant_message = call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p)
    history_api.append([message, assistant_message])
    return assistant_message, history_api

system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)

with gr.Blocks() as demo:
    chat_history_state = gr.State([])
    chat_interface = gr.ChatInterface(
        fn=chatbot_function,
        chatbot=gr.Chatbot(history=chat_history_state),
        additional_inputs=[system_msg, max_tokens, temperature, top_p],
        title="LLAMA 70B Free Demo",
    )

demo.launch()