File size: 5,500 Bytes
c551206 e5b736f c551206 e5b736f c551206 e5b736f 864eaee e5b736f c551206 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
import requests
import json
import os
API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"
headers = {
"Authorization": f"Bearer {API_KEY}",
"Accept": "application/json",
"Content-Type": "application/json",
}
BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
def clear_chat(chat_history_state, chat_message):
print("Clearing chat...")
chat_history_state = []
chat_message = ''
return chat_history_state, chat_message
def user(message, history, system_message=None):
print(f"User message: {message}")
history = history or []
if system_message: # Check if a system message is provided and should be added
history.append({"role": "system", "content": system_message})
history.append({"role": "user", "content": message})
return history
def call_nvidia_api(history, max_tokens, temperature, top_p):
payload = {
"messages": history,
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": False
}
print(f"Payload enviado: {payload}") # Imprime o payload enviado
session = requests.Session()
response = session.post(INVOKE_URL, headers=headers, json=payload)
while response.status_code == 202:
request_id = response.headers.get("NVCF-REQID")
fetch_url = FETCH_URL_FORMAT + request_id
response = session.get(fetch_url, headers=headers)
response.raise_for_status()
response_body = response.json()
print(f"Payload recebido: {response_body}") # Imprime o payload recebido
if response_body["choices"]:
assistant_message = response_body["choices"][0]["message"]["content"]
history.append({"role": "assistant", "content": assistant_message})
return history
def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
print("Starting chat...")
updated_history = call_nvidia_api(history, max_tokens, temperature, top_p)
return updated_history, ""
# Gradio interface setup
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown("LLAMA 2 70B Free Demo")
description="""
<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
<strong>Explore the Capabilities of LLAMA 2 70B</strong>
</div>
<p>Llama 2 is a large language AI model capable of generating text and code in response to prompts.
</p>
<p> <strong>How to Use:</strong></p>
<ol>
<li>Enter your <strong>message</strong> in the textbox to start a conversation or ask a question.</li>
<li>Adjust the parameters in the "Additional Inputs" accordion to control the model's behavior.</li>
<li>Use the buttons below the chatbot to submit your query, clear the chat history, or perform other actions.</li>
</ol>
<p> <strong>Powered by NVIDIA's cutting-edge AI API, LLAMA 2 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.</strong></p>
<p> <strong>HF Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)</p>
<p> <strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a></p>
"""
gr.Markdown(description)
chatbot = gr.ChatInterface(
fn=chat,
title="LLAMA 2 70B Chatbot",
submit_btn="Submit",
stop_btn="Stop",
retry_btn="🔄 Retry",
undo_btn="↩️ Undo",
clear_btn="🗑️ Clear"
)
message, *_ = chatbot.components
system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024, interactive=True)
temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2, interactive=True)
top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7, interactive=True)
chat_history_state = gr.State([])
with gr.Accordion("Additional Inputs"):
gr.Markdown("Customize the model's behavior using the inputs below.")
system_msg_container = gr.Container(system_msg, label="System Message")
max_tokens_container = gr.Container(max_tokens, label="Max Tokens")
temperature_container = gr.Container(temperature, label="Temperature")
top_p_container = gr.Container(top_p, label="Top P")
def update_chatbot(message, chat_history, system_message, max_tokens, temperature, top_p):
print("Updating chatbot...")
if not chat_history or (chat_history and chat_history[-1]["role"] != "user"):
chat_history = user(message, chat_history, system_message if not chat_history else None)
else:
chat_history = user(message, chat_history)
chat_history, _ = chat(chat_history, system_message, max_tokens, temperature, top_p, 40, 1.1)
return chat_history
chatbot.submit(
fn=update_chatbot,
inputs=[message, chat_history_state, system_msg, max_tokens, temperature, top_p],
outputs=chat_history_state
)
chatbot.clear(
fn=clear_chat,
inputs=[chat_history_state, message],
outputs=[chat_history_state, message]
)
demo.launch() |