File size: 5,075 Bytes
c551206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5b736f
c15a43b
 
 
 
 
 
 
 
 
c551206
 
 
 
 
 
 
 
 
 
 
 
 
 
e5b736f
 
c551206
 
 
 
 
e5b736f
2c5ba3b
e5b736f
 
 
 
 
 
c15a43b
 
 
e5b736f
c15a43b
 
 
 
 
 
 
e5b736f
 
 
 
2c5ba3b
 
e5b736f
c551206
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import gradio as gr
import requests
import json
import os

API_KEY = os.getenv('API_KEY') 
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def clear_chat(chat_history_state, chat_message):
    print("Clearing chat...")
    chat_history_state = []
    chat_message = ''
    return chat_history_state, chat_message

def user(message, history, system_message=None):
    print(f"User message: {message}")
    history = history or []
    if system_message:  # Check if a system message is provided and should be added
        history.append({"role": "system", "content": system_message})
    history.append({"role": "user", "content": message})
    return history

def call_nvidia_api(history, max_tokens, temperature, top_p):
    payload = {
        "messages": history,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }

    print(f"Payload enviado: {payload}")  # Imprime o payload enviado

    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)

    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
    
    response.raise_for_status()
    response_body = response.json()

    print(f"Payload recebido: {response_body}")  # Imprime o payload recebido

    if response_body["choices"]:
        assistant_message = response_body["choices"][0]["message"]["content"]
        history.append({"role": "assistant", "content": assistant_message})
    
    return history

def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
    print("Starting chat...")
    updated_history = call_nvidia_api(history, max_tokens, temperature, top_p)
    return updated_history, ""

def update_chatbot(message, chat_history, system_message, max_tokens, temperature, top_p):
    print("Updating chatbot...")
    if not chat_history or (chat_history and chat_history[-1]["role"] != "user"):
        chat_history = user(message, chat_history, system_message if not chat_history else None)
    else:
        chat_history = user(message, chat_history)
    chat_history, _ = call_nvidia_api(chat_history, max_tokens, temperature, top_p)
    return chat_history
    
# Gradio interface setup
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("LLAMA 2 70B Free Demo")
            description="""
<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
    <strong>Explore the Capabilities of LLAMA 2 70B</strong>
</div>
<p>Llama 2 is a large language AI model capable of generating text and code in response to prompts.
</p>
<p> <strong>How to Use:</strong></p>
<ol>
    <li>Enter your <strong>message</strong> in the textbox to start a conversation or ask a question.</li>
    <li>Adjust the parameters in the "Additional Inputs" accordion to control the model's behavior.</li>
    <li>Use the buttons below the chatbot to submit your query, clear the chat history, or perform other actions.</li>
</ol>
<p> <strong>Powered by NVIDIA's cutting-edge AI API, LLAMA 2 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.</strong></p>
<p> <strong>HF Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)</p>
<p> <strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a></p>
"""
    gr.Markdown(description)
    chatbox = gr.Textbox(label="What do you want to chat about?", placeholder="Ask me anything.", lines=3)
    system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
    max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024, interactive=True)
    temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2, interactive=True)
    top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7, interactive=True)
    chat_history_state = gr.State([])

    additional_inputs = [system_msg, max_tokens, temperature, top_p]

    chatbot = gr.ChatInterface(
        fn=update_chatbot,
        inputs=[chatbox, chat_history_state],
        outputs=chat_history_state,
        additional_inputs=additional_inputs,
        title="LLAMA 2 70B Chatbot",
        submit_btn="Submit",
        clear_btn="🗑️ Clear",
        textbox=chatbox,
    )

    chatbot.clear(
        fn=clear_chat,
        inputs=[chat_history_state, chatbox],
        outputs=[chat_history_state, chatbox]
    )

demo.launch()